From patchwork Tue May 12 17:05:00 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Bruce Richardson X-Patchwork-Id: 4706 Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [IPv6:::1]) by dpdk.org (Postfix) with ESMTP id 02E189A8F; Tue, 12 May 2015 21:24:40 +0200 (CEST) Received: from mga09.intel.com (mga09.intel.com [134.134.136.24]) by dpdk.org (Postfix) with ESMTP id 41AD3688E for ; Tue, 12 May 2015 19:06:16 +0200 (CEST) Received: from fmsmga003.fm.intel.com ([10.253.24.29]) by orsmga102.jf.intel.com with ESMTP; 12 May 2015 10:05:47 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.13,416,1427785200"; d="scan'208";a="492672093" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by FMSMGA003.fm.intel.com with ESMTP; 12 May 2015 10:05:44 -0700 Received: from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com [10.237.217.45]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id t4CH5gTi020972; Tue, 12 May 2015 18:05:43 +0100 Received: from sivswdev01.ir.intel.com (localhost [127.0.0.1]) by sivswdev01.ir.intel.com with ESMTP id t4CH5eGH013494; Tue, 12 May 2015 18:05:40 +0100 Received: (from bricha3@localhost) by sivswdev01.ir.intel.com with id t4CH5chX013490; Tue, 12 May 2015 18:05:38 +0100 From: Bruce Richardson To: dev@dpdk.org Date: Tue, 12 May 2015 18:05:00 +0100 Message-Id: <1431450315-13179-5-git-send-email-bruce.richardson@intel.com> X-Mailer: git-send-email 1.7.4.1 In-Reply-To: <1431450315-13179-1-git-send-email-bruce.richardson@intel.com> References: <1431012951-6423-1-git-send-email-bruce.richardson@intel.com> <1431450315-13179-1-git-send-email-bruce.richardson@intel.com> X-Mailman-Approved-At: Tue, 12 May 2015 21:24:38 +0200 Subject: [dpdk-dev] [PATCH 04/19] bond: Move bonded ethdev pmd to drivers X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Move bonded ethdev pmd to drivers Signed-off-by: Bruce Richardson --- doc/api/doxy-api.conf | 2 +- drivers/Makefile | 2 +- drivers/bonding/Makefile | 68 + drivers/bonding/rte_eth_bond.h | 366 ++++ drivers/bonding/rte_eth_bond_8023ad.c | 1216 +++++++++++ drivers/bonding/rte_eth_bond_8023ad.h | 222 ++ drivers/bonding/rte_eth_bond_8023ad_private.h | 308 +++ drivers/bonding/rte_eth_bond_alb.c | 287 +++ drivers/bonding/rte_eth_bond_alb.h | 142 ++ drivers/bonding/rte_eth_bond_api.c | 840 ++++++++ drivers/bonding/rte_eth_bond_args.c | 278 +++ drivers/bonding/rte_eth_bond_pmd.c | 2269 +++++++++++++++++++++ drivers/bonding/rte_eth_bond_private.h | 287 +++ drivers/bonding/rte_eth_bond_version.map | 22 + lib/Makefile | 1 - lib/librte_pmd_bond/Makefile | 68 - lib/librte_pmd_bond/rte_eth_bond.h | 366 ---- lib/librte_pmd_bond/rte_eth_bond_8023ad.c | 1216 ----------- lib/librte_pmd_bond/rte_eth_bond_8023ad.h | 222 -- lib/librte_pmd_bond/rte_eth_bond_8023ad_private.h | 308 --- lib/librte_pmd_bond/rte_eth_bond_alb.c | 287 --- lib/librte_pmd_bond/rte_eth_bond_alb.h | 142 -- lib/librte_pmd_bond/rte_eth_bond_api.c | 840 -------- lib/librte_pmd_bond/rte_eth_bond_args.c | 278 --- lib/librte_pmd_bond/rte_eth_bond_pmd.c | 2269 --------------------- lib/librte_pmd_bond/rte_eth_bond_private.h | 287 --- lib/librte_pmd_bond/rte_eth_bond_version.map | 22 - 27 files changed, 6307 insertions(+), 6308 deletions(-) create mode 100644 drivers/bonding/Makefile create mode 100644 drivers/bonding/rte_eth_bond.h create mode 100644 drivers/bonding/rte_eth_bond_8023ad.c create mode 100644 drivers/bonding/rte_eth_bond_8023ad.h create mode 100644 drivers/bonding/rte_eth_bond_8023ad_private.h create mode 100644 drivers/bonding/rte_eth_bond_alb.c create mode 100644 drivers/bonding/rte_eth_bond_alb.h create mode 100644 drivers/bonding/rte_eth_bond_api.c create mode 100644 drivers/bonding/rte_eth_bond_args.c create mode 100644 drivers/bonding/rte_eth_bond_pmd.c create mode 100644 drivers/bonding/rte_eth_bond_private.h create mode 100644 drivers/bonding/rte_eth_bond_version.map delete mode 100644 lib/librte_pmd_bond/Makefile delete mode 100644 lib/librte_pmd_bond/rte_eth_bond.h delete mode 100644 lib/librte_pmd_bond/rte_eth_bond_8023ad.c delete mode 100644 lib/librte_pmd_bond/rte_eth_bond_8023ad.h delete mode 100644 lib/librte_pmd_bond/rte_eth_bond_8023ad_private.h delete mode 100644 lib/librte_pmd_bond/rte_eth_bond_alb.c delete mode 100644 lib/librte_pmd_bond/rte_eth_bond_alb.h delete mode 100644 lib/librte_pmd_bond/rte_eth_bond_api.c delete mode 100644 lib/librte_pmd_bond/rte_eth_bond_args.c delete mode 100644 lib/librte_pmd_bond/rte_eth_bond_pmd.c delete mode 100644 lib/librte_pmd_bond/rte_eth_bond_private.h delete mode 100644 lib/librte_pmd_bond/rte_eth_bond_version.map diff --git a/doc/api/doxy-api.conf b/doc/api/doxy-api.conf index 50b0105..813edf1 100644 --- a/doc/api/doxy-api.conf +++ b/doc/api/doxy-api.conf @@ -49,7 +49,7 @@ INPUT = doc/api/doxy-api-index.md \ lib/librte_pipeline \ lib/librte_port \ lib/librte_power \ - lib/librte_pmd_bond \ + drivers/bonding \ lib/librte_reorder \ lib/librte_ring \ lib/librte_sched \ diff --git a/drivers/Makefile b/drivers/Makefile index 0ea99c5..40df712 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -32,13 +32,13 @@ include $(RTE_SDK)/mk/rte.vars.mk DIRS-$(CONFIG_RTE_LIBRTE_PMD_AF_PACKET) += af_packet +DIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += bonding #DIRS-$(CONFIG_RTE_LIBRTE_E1000_PMD) += librte_pmd_e1000 #DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += librte_pmd_ixgbe #DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += librte_pmd_i40e #DIRS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += librte_pmd_fm10k #DIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += librte_pmd_mlx4 #DIRS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += librte_pmd_enic -#DIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += librte_pmd_bond #DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += librte_pmd_ring #DIRS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += librte_pmd_pcap #DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += librte_pmd_virtio diff --git a/drivers/bonding/Makefile b/drivers/bonding/Makefile new file mode 100644 index 0000000..83ccce3 --- /dev/null +++ b/drivers/bonding/Makefile @@ -0,0 +1,68 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# library name +# +LIB = librte_pmd_bond.a + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +EXPORT_MAP := rte_eth_bond_version.map + +LIBABIVER := 1 + +# +# all source are stored in SRCS-y +# +SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_api.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_pmd.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_args.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_8023ad.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_alb.c + +# +# Export include files +# +SYMLINK-y-include += rte_eth_bond.h +SYMLINK-y-include += rte_eth_bond_8023ad.h + +# this lib depends upon: +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += lib/librte_mbuf +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += lib/librte_ether +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += lib/librte_malloc +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += lib/librte_eal +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += lib/librte_kvargs + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/drivers/bonding/rte_eth_bond.h b/drivers/bonding/rte_eth_bond.h new file mode 100644 index 0000000..d688fc3 --- /dev/null +++ b/drivers/bonding/rte_eth_bond.h @@ -0,0 +1,366 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ETH_BOND_H_ +#define _RTE_ETH_BOND_H_ + +/** + * @file rte_eth_bond.h + * + * RTE Link Bonding Ethernet Device + * Link Bonding for 1GbE and 10GbE ports to allow the aggregation of multiple + * (slave) NICs into a single logical interface. The bonded device processes + * these interfaces based on the mode of operation specified and supported. + * This implementation supports 4 modes of operation round robin, active backup + * balance and broadcast. Providing redundant links, fault tolerance and/or + * load balancing of network ports + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/* Supported modes of operation of link bonding library */ + +#define BONDING_MODE_ROUND_ROBIN (0) +/**< Round Robin (Mode 0). + * In this mode all transmitted packets will be balanced equally across all + * active slaves of the bonded in a round robin fashion. */ +#define BONDING_MODE_ACTIVE_BACKUP (1) +/**< Active Backup (Mode 1). + * In this mode all packets transmitted will be transmitted on the primary + * slave until such point as the primary slave is no longer available and then + * transmitted packets will be sent on the next available slaves. The primary + * slave can be defined by the user but defaults to the first active slave + * available if not specified. */ +#define BONDING_MODE_BALANCE (2) +/**< Balance (Mode 2). + * In this mode all packets transmitted will be balanced across the available + * slaves using one of three available transmit policies - l2, l2+3 or l3+4. + * See BALANCE_XMIT_POLICY macros definitions for further details on transmit + * policies. */ +#define BONDING_MODE_BROADCAST (3) +/**< Broadcast (Mode 3). + * In this mode all transmitted packets will be transmitted on all available + * active slaves of the bonded. */ +#define BONDING_MODE_8023AD (4) +/**< 802.3AD (Mode 4). + * + * This mode provides auto negotiation/configuration + * of peers and well as link status changes monitoring using out of band + * LACP (link aggregation control protocol) messages. For further details of + * LACP specification see the IEEE 802.3ad/802.1AX standards. It is also + * described here + * https://www.kernel.org/doc/Documentation/networking/bonding.txt. + * + * Important Usage Notes: + * - for LACP mode to work the rx/tx burst functions must be invoked + * at least once every 100ms, otherwise the out-of-band LACP messages will not + * be handled with the expected latency and this may cause the link status to be + * incorrectly marked as down or failure to correctly negotiate with peers. + * - For optimal performance during initial handshaking the array of mbufs provided + * to rx_burst should be at least 2 times the slave count size. + * + */ +#define BONDING_MODE_TLB (5) +/**< Adaptive TLB (Mode 5) + * This mode provides an adaptive transmit load balancing. It dynamically + * changes the transmitting slave, according to the computed load. Statistics + * are collected in 100ms intervals and scheduled every 10ms */ +#define BONDING_MODE_ALB (6) +/**< Adaptive Load Balancing (Mode 6) + * This mode includes adaptive TLB and receive load balancing (RLB). In RLB the + * bonding driver intercepts ARP replies send by local system and overwrites its + * source MAC address, so that different peers send data to the server on + * different slave interfaces. When local system sends ARP request, it saves IP + * information from it. When ARP reply from that peer is received, its MAC is + * stored, one of slave MACs assigned and ARP reply send to that peer. + */ + +/* Balance Mode Transmit Policies */ +#define BALANCE_XMIT_POLICY_LAYER2 (0) +/**< Layer 2 (Ethernet MAC) */ +#define BALANCE_XMIT_POLICY_LAYER23 (1) +/**< Layer 2+3 (Ethernet MAC + IP Addresses) transmit load balancing */ +#define BALANCE_XMIT_POLICY_LAYER34 (2) +/**< Layer 3+4 (IP Addresses + UDP Ports) transmit load balancing */ + +/** + * Create a bonded rte_eth_dev device + * + * @param name Name of new link bonding device. + * @param mode Mode to initialize bonding device in. + * @param socket_id Socket Id on which to allocate eth_dev resources. + * + * @return + * Port Id of created rte_eth_dev on success, negative value otherwise + */ +int +rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id); + +/** + * Add a rte_eth_dev device as a slave to the bonded device + * + * @param bonded_port_id Port ID of bonded device. + * @param slave_port_id Port ID of slave device. + * + * @return + * 0 on success, negative value otherwise + */ +int +rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id); + +/** + * Remove a slave rte_eth_dev device from the bonded device + * + * @param bonded_port_id Port ID of bonded device. + * @param slave_port_id Port ID of slave device. + * + * @return + * 0 on success, negative value otherwise + */ +int +rte_eth_bond_slave_remove(uint8_t bonded_port_id, uint8_t slave_port_id); + +/** + * Set link bonding mode of bonded device + * + * @param bonded_port_id Port ID of bonded device. + * @param mode Bonding mode to set + * + * @return + * 0 on success, negative value otherwise + */ +int +rte_eth_bond_mode_set(uint8_t bonded_port_id, uint8_t mode); + +/** + * Get link bonding mode of bonded device + * + * @param bonded_port_id Port ID of bonded device. + * + * @return + * link bonding mode on success, negative value otherwise + */ +int +rte_eth_bond_mode_get(uint8_t bonded_port_id); + +/** + * Set slave rte_eth_dev as primary slave of bonded device + * + * @param bonded_port_id Port ID of bonded device. + * @param slave_port_id Port ID of slave device. + * + * @return + * 0 on success, negative value otherwise + */ +int +rte_eth_bond_primary_set(uint8_t bonded_port_id, uint8_t slave_port_id); + +/** + * Get primary slave of bonded device + * + * @param bonded_port_id Port ID of bonded device. + * + * @return + * Port Id of primary slave on success, -1 on failure + */ +int +rte_eth_bond_primary_get(uint8_t bonded_port_id); + +/** + * Populate an array with list of the slaves port id's of the bonded device + * + * @param bonded_port_id Port ID of bonded eth_dev to interrogate + * @param slaves Array to be populated with the current active slaves + * @param len Length of slaves array + * + * @return + * Number of slaves associated with bonded device on success, + * negative value otherwise + */ +int +rte_eth_bond_slaves_get(uint8_t bonded_port_id, uint8_t slaves[], uint8_t len); + +/** + * Populate an array with list of the active slaves port id's of the bonded + * device. + * + * @param bonded_port_id Port ID of bonded eth_dev to interrogate + * @param slaves Array to be populated with the current active slaves + * @param len Length of slaves array + * + * @return + * Number of active slaves associated with bonded device on success, + * negative value otherwise + */ +int +rte_eth_bond_active_slaves_get(uint8_t bonded_port_id, uint8_t slaves[], + uint8_t len); + +/** + * Set explicit MAC address to use on bonded device and it's slaves. + * + * @param bonded_port_id Port ID of bonded device. + * @param mac_addr MAC Address to use on bonded device overriding + * slaves MAC addresses + * + * @return + * 0 on success, negative value otherwise + */ +int +rte_eth_bond_mac_address_set(uint8_t bonded_port_id, + struct ether_addr *mac_addr); + +/** + * Reset bonded device to use MAC from primary slave on bonded device and it's + * slaves. + * + * @param bonded_port_id Port ID of bonded device. + * + * @return + * 0 on success, negative value otherwise + */ +int +rte_eth_bond_mac_address_reset(uint8_t bonded_port_id); + +/** + * Set the transmit policy for bonded device to use when it is operating in + * balance mode, this parameter is otherwise ignored in other modes of + * operation. + * + * @param bonded_port_id Port ID of bonded device. + * @param policy Balance mode transmission policy. + * + * @return + * 0 on success, negative value otherwise. + */ +int +rte_eth_bond_xmit_policy_set(uint8_t bonded_port_id, uint8_t policy); + +/** + * Get the transmit policy set on bonded device for balance mode operation + * + * @param bonded_port_id Port ID of bonded device. + * + * @return + * Balance transmit policy on success, negative value otherwise. + */ +int +rte_eth_bond_xmit_policy_get(uint8_t bonded_port_id); + +/** + * Set the link monitoring frequency (in ms) for monitoring the link status of + * slave devices + * + * @param bonded_port_id Port ID of bonded device. + * @param internal_ms Monitoring interval in milliseconds + * + * @return + * 0 on success, negative value otherwise. + */ + +int +rte_eth_bond_link_monitoring_set(uint8_t bonded_port_id, uint32_t internal_ms); + +/** + * Get the current link monitoring frequency (in ms) for monitoring of the link + * status of slave devices + * + * @param bonded_port_id Port ID of bonded device. + * + * @return + * Monitoring interval on success, negative value otherwise. + */ +int +rte_eth_bond_link_monitoring_get(uint8_t bonded_port_id); + + +/** + * Set the period in milliseconds for delaying the disabling of a bonded link + * when the link down status has been detected + * + * @param bonded_port_id Port ID of bonded device. + * @param delay_ms Delay period in milliseconds. + * + * @return + * 0 on success, negative value otherwise. + */ +int +rte_eth_bond_link_down_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms); + +/** + * Get the period in milliseconds set for delaying the disabling of a bonded + * link when the link down status has been detected + * + * @param bonded_port_id Port ID of bonded device. + * + * @return + * Delay period on success, negative value otherwise. + */ +int +rte_eth_bond_link_down_prop_delay_get(uint8_t bonded_port_id); + +/** + * Set the period in milliseconds for delaying the enabling of a bonded link + * when the link up status has been detected + * + * @param bonded_port_id Port ID of bonded device. + * @param delay_ms Delay period in milliseconds. + * + * @return + * 0 on success, negative value otherwise. + */ +int +rte_eth_bond_link_up_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms); + +/** + * Get the period in milliseconds set for delaying the enabling of a bonded + * link when the link up status has been detected + * + * @param bonded_port_id Port ID of bonded device. + * + * @return + * Delay period on success, negative value otherwise. + */ +int +rte_eth_bond_link_up_prop_delay_get(uint8_t bonded_port_id); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/drivers/bonding/rte_eth_bond_8023ad.c b/drivers/bonding/rte_eth_bond_8023ad.c new file mode 100644 index 0000000..97a828e --- /dev/null +++ b/drivers/bonding/rte_eth_bond_8023ad.c @@ -0,0 +1,1216 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include +#include +#include +#include + +#include "rte_eth_bond_private.h" + +#ifdef RTE_LIBRTE_BOND_DEBUG_8023AD +#define MODE4_DEBUG(fmt, ...) RTE_LOG(DEBUG, PMD, "%6u [Port %u: %s] " fmt, \ + bond_dbg_get_time_diff_ms(), slave_id, \ + __func__, ##__VA_ARGS__) + +static uint64_t start_time; + +static unsigned +bond_dbg_get_time_diff_ms(void) +{ + uint64_t now; + + now = rte_rdtsc(); + if (start_time == 0) + start_time = now; + + return ((now - start_time) * 1000) / rte_get_tsc_hz(); +} + +static void +bond_print_lacp(struct lacpdu *l) +{ + char a_address[18]; + char p_address[18]; + char a_state[256] = { 0 }; + char p_state[256] = { 0 }; + + static const char * const state_labels[] = { + "ACT", "TIMEOUT", "AGG", "SYNC", "COL", "DIST", "DEF", "EXP" + }; + + int a_len = 0; + int p_len = 0; + uint8_t i; + uint8_t *addr; + + addr = l->actor.port_params.system.addr_bytes; + snprintf(a_address, sizeof(a_address), "%02X:%02X:%02X:%02X:%02X:%02X", + addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); + + addr = l->partner.port_params.system.addr_bytes; + snprintf(p_address, sizeof(p_address), "%02X:%02X:%02X:%02X:%02X:%02X", + addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); + + for (i = 0; i < 8; i++) { + if ((l->actor.state >> i) & 1) { + a_len += snprintf(&a_state[a_len], RTE_DIM(a_state) - a_len, "%s ", + state_labels[i]); + } + + if ((l->partner.state >> i) & 1) { + p_len += snprintf(&p_state[p_len], RTE_DIM(p_state) - p_len, "%s ", + state_labels[i]); + } + } + + if (a_len && a_state[a_len-1] == ' ') + a_state[a_len-1] = '\0'; + + if (p_len && p_state[p_len-1] == ' ') + p_state[p_len-1] = '\0'; + + RTE_LOG(DEBUG, PMD, "LACP: {\n"\ + " subtype= %02X\n"\ + " ver_num=%02X\n"\ + " actor={ tlv=%02X, len=%02X\n"\ + " pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n"\ + " state={ %s }\n"\ + " }\n"\ + " partner={ tlv=%02X, len=%02X\n"\ + " pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n"\ + " state={ %s }\n"\ + " }\n"\ + " collector={info=%02X, length=%02X, max_delay=%04X\n, " \ + "type_term=%02X, terminator_length = %02X}\n",\ + l->subtype,\ + l->version_number,\ + l->actor.tlv_type_info,\ + l->actor.info_length,\ + l->actor.port_params.system_priority,\ + a_address,\ + l->actor.port_params.key,\ + l->actor.port_params.port_priority,\ + l->actor.port_params.port_number,\ + a_state,\ + l->partner.tlv_type_info,\ + l->partner.info_length,\ + l->partner.port_params.system_priority,\ + p_address,\ + l->partner.port_params.key,\ + l->partner.port_params.port_priority,\ + l->partner.port_params.port_number,\ + p_state,\ + l->tlv_type_collector_info,\ + l->collector_info_length,\ + l->collector_max_delay,\ + l->tlv_type_terminator,\ + l->terminator_length); + +} +#define BOND_PRINT_LACP(lacpdu) bond_print_lacp(lacpdu) +#else +#define BOND_PRINT_LACP(lacpdu) do { } while (0) +#define MODE4_DEBUG(fmt, ...) do { } while (0) +#endif + +static const struct ether_addr lacp_mac_addr = { + .addr_bytes = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x02 } +}; + +struct port mode_8023ad_ports[RTE_MAX_ETHPORTS]; + +static void +timer_cancel(uint64_t *timer) +{ + *timer = 0; +} + +static void +timer_set(uint64_t *timer, uint64_t timeout) +{ + *timer = rte_rdtsc() + timeout; +} + +/* Forces given timer to be in expired state. */ +static void +timer_force_expired(uint64_t *timer) +{ + *timer = rte_rdtsc(); +} + +static bool +timer_is_stopped(uint64_t *timer) +{ + return *timer == 0; +} + +static bool +timer_is_expired(uint64_t *timer) +{ + return *timer < rte_rdtsc(); +} + +/* Timer is in running state if it is not stopped nor expired */ +static bool +timer_is_running(uint64_t *timer) +{ + return !timer_is_stopped(timer) && !timer_is_expired(timer); +} + +static void +set_warning_flags(struct port *port, uint16_t flags) +{ + int retval; + uint16_t old; + uint16_t new_flag = 0; + + do { + old = port->warnings_to_show; + new_flag = old | flags; + retval = rte_atomic16_cmpset(&port->warnings_to_show, old, new_flag); + } while (unlikely(retval == 0)); +} + +static void +show_warnings(uint8_t slave_id) +{ + struct port *port = &mode_8023ad_ports[slave_id]; + uint8_t warnings; + + do { + warnings = port->warnings_to_show; + } while (rte_atomic16_cmpset(&port->warnings_to_show, warnings, 0) == 0); + + if (!warnings) + return; + + if (!timer_is_expired(&port->warning_timer)) + return; + + + timer_set(&port->warning_timer, BOND_8023AD_WARNINGS_PERIOD_MS * + rte_get_tsc_hz() / 1000); + + if (warnings & WRN_RX_QUEUE_FULL) { + RTE_LOG(DEBUG, PMD, + "Slave %u: failed to enqueue LACP packet into RX ring.\n" + "Receive and transmit functions must be invoked on bonded\n" + "interface at least 10 times per second or LACP will not\n" + "work correctly\n", slave_id); + } + + if (warnings & WRN_TX_QUEUE_FULL) { + RTE_LOG(DEBUG, PMD, + "Slave %u: failed to enqueue LACP packet into TX ring.\n" + "Receive and transmit functions must be invoked on bonded\n" + "interface at least 10 times per second or LACP will not\n" + "work correctly\n", slave_id); + } + + if (warnings & WRN_RX_MARKER_TO_FAST) + RTE_LOG(INFO, PMD, "Slave %u: marker to early - ignoring.\n", slave_id); + + if (warnings & WRN_UNKNOWN_SLOW_TYPE) { + RTE_LOG(INFO, PMD, + "Slave %u: ignoring unknown slow protocol frame type", slave_id); + } + + if (warnings & WRN_UNKNOWN_MARKER_TYPE) + RTE_LOG(INFO, PMD, "Slave %u: ignoring unknown marker type", slave_id); + + if (warnings & WRN_NOT_LACP_CAPABLE) + MODE4_DEBUG("Port %u is not LACP capable!\n", slave_id); +} + +static void +record_default(struct port *port) +{ + /* Record default parameters for partner. Partner admin parameters + * are not implemented so set them to arbitrary default (last known) and + * mark actor that parner is in defaulted state. */ + port->partner_state = STATE_LACP_ACTIVE; + ACTOR_STATE_SET(port, DEFAULTED); +} + +/** Function handles rx state machine. + * + * This function implements Receive State Machine from point 5.4.12 in + * 802.1AX documentation. It should be called periodically. + * + * @param lacpdu LACPDU received. + * @param port Port on which LACPDU was received. + */ +static void +rx_machine(struct bond_dev_private *internals, uint8_t slave_id, + struct lacpdu *lacp) +{ + struct port *agg, *port = &mode_8023ad_ports[slave_id]; + uint64_t timeout; + + if (SM_FLAG(port, BEGIN)) { + /* Initialize stuff */ + MODE4_DEBUG("-> INITIALIZE\n"); + SM_FLAG_CLR(port, MOVED); + port->selected = UNSELECTED; + + record_default(port); + + ACTOR_STATE_CLR(port, EXPIRED); + timer_cancel(&port->current_while_timer); + + /* DISABLED: On initialization partner is out of sync */ + PARTNER_STATE_CLR(port, SYNCHRONIZATION); + + /* LACP DISABLED stuff if LACP not enabled on this port */ + if (!SM_FLAG(port, LACP_ENABLED)) + PARTNER_STATE_CLR(port, AGGREGATION); + else + PARTNER_STATE_SET(port, AGGREGATION); + } + + if (!SM_FLAG(port, LACP_ENABLED)) { + /* Update parameters only if state changed */ + if (!timer_is_stopped(&port->current_while_timer)) { + port->selected = UNSELECTED; + record_default(port); + PARTNER_STATE_CLR(port, AGGREGATION); + ACTOR_STATE_CLR(port, EXPIRED); + timer_cancel(&port->current_while_timer); + } + return; + } + + if (lacp) { + MODE4_DEBUG("LACP -> CURRENT\n"); + BOND_PRINT_LACP(lacp); + /* Update selected flag. If partner parameters are defaulted assume they + * are match. If not defaulted compare LACP actor with ports parner + * params. */ + if (!ACTOR_STATE(port, DEFAULTED) && + (ACTOR_STATE(port, AGGREGATION) != PARTNER_STATE(port, AGGREGATION) + || memcmp(&port->partner, &lacp->actor.port_params, + sizeof(port->partner)) != 0)) { + MODE4_DEBUG("selected <- UNSELECTED\n"); + port->selected = UNSELECTED; + } + + /* Record this PDU actor params as partner params */ + memcpy(&port->partner, &lacp->actor.port_params, + sizeof(struct port_params)); + port->partner_state = lacp->actor.state; + + /* Partner parameters are not defaulted any more */ + ACTOR_STATE_CLR(port, DEFAULTED); + + /* If LACP partner params match this port actor params */ + agg = &mode_8023ad_ports[port->aggregator_port_id]; + bool match = port->actor.system_priority == + lacp->partner.port_params.system_priority && + is_same_ether_addr(&agg->actor.system, + &lacp->partner.port_params.system) && + port->actor.port_priority == + lacp->partner.port_params.port_priority && + port->actor.port_number == + lacp->partner.port_params.port_number; + + /* Update NTT if partners information are outdated (xored and masked + * bits are set)*/ + uint8_t state_mask = STATE_LACP_ACTIVE | STATE_LACP_SHORT_TIMEOUT | + STATE_SYNCHRONIZATION | STATE_AGGREGATION; + + if (((port->actor_state ^ lacp->partner.state) & state_mask) || + match == false) { + SM_FLAG_SET(port, NTT); + } + + /* If LACP partner params match this port actor params */ + if (match == true && ACTOR_STATE(port, AGGREGATION) == + PARTNER_STATE(port, AGGREGATION)) + PARTNER_STATE_SET(port, SYNCHRONIZATION); + else if (!PARTNER_STATE(port, AGGREGATION) && ACTOR_STATE(port, + AGGREGATION)) + PARTNER_STATE_SET(port, SYNCHRONIZATION); + else + PARTNER_STATE_CLR(port, SYNCHRONIZATION); + + if (ACTOR_STATE(port, LACP_SHORT_TIMEOUT)) + timeout = internals->mode4.short_timeout; + else + timeout = internals->mode4.long_timeout; + + timer_set(&port->current_while_timer, timeout); + ACTOR_STATE_CLR(port, EXPIRED); + return; /* No state change */ + } + + /* If CURRENT state timer is not running (stopped or expired) + * transit to EXPIRED state from DISABLED or CURRENT */ + if (!timer_is_running(&port->current_while_timer)) { + ACTOR_STATE_SET(port, EXPIRED); + PARTNER_STATE_CLR(port, SYNCHRONIZATION); + PARTNER_STATE_SET(port, LACP_SHORT_TIMEOUT); + timer_set(&port->current_while_timer, internals->mode4.short_timeout); + } +} + +/** + * Function handles periodic tx state machine. + * + * Function implements Periodic Transmission state machine from point 5.4.13 + * in 802.1AX documentation. It should be called periodically. + * + * @param port Port to handle state machine. + */ +static void +periodic_machine(struct bond_dev_private *internals, uint8_t slave_id) +{ + struct port *port = &mode_8023ad_ports[slave_id]; + /* Calculate if either site is LACP enabled */ + uint64_t timeout; + uint8_t active = ACTOR_STATE(port, LACP_ACTIVE) || + PARTNER_STATE(port, LACP_ACTIVE); + + uint8_t is_partner_fast, was_partner_fast; + /* No periodic is on BEGIN, LACP DISABLE or when both sides are pasive */ + if (SM_FLAG(port, BEGIN) || !SM_FLAG(port, LACP_ENABLED) || !active) { + timer_cancel(&port->periodic_timer); + timer_force_expired(&port->tx_machine_timer); + SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT); + + MODE4_DEBUG("-> NO_PERIODIC ( %s%s%s)\n", + SM_FLAG(port, BEGIN) ? "begind " : "", + SM_FLAG(port, LACP_ENABLED) ? "" : "LACP disabled ", + active ? "LACP active " : "LACP pasive "); + return; + } + + is_partner_fast = PARTNER_STATE(port, LACP_SHORT_TIMEOUT); + was_partner_fast = SM_FLAG(port, PARTNER_SHORT_TIMEOUT); + + /* If periodic timer is not started, transit from NO PERIODIC to FAST/SLOW. + * Other case: check if timer expire or partners settings changed. */ + if (!timer_is_stopped(&port->periodic_timer)) { + if (timer_is_expired(&port->periodic_timer)) { + SM_FLAG_SET(port, NTT); + } else if (is_partner_fast != was_partner_fast) { + /* Partners timeout was slow and now it is fast -> send LACP. + * In other case (was fast and now it is slow) just switch + * timeout to slow without forcing send of LACP (because standard + * say so)*/ + if (!is_partner_fast) + SM_FLAG_SET(port, NTT); + } else + return; /* Nothing changed */ + } + + /* Handle state transition to FAST/SLOW LACP timeout */ + if (is_partner_fast) { + timeout = internals->mode4.fast_periodic_timeout; + SM_FLAG_SET(port, PARTNER_SHORT_TIMEOUT); + } else { + timeout = internals->mode4.slow_periodic_timeout; + SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT); + } + + timer_set(&port->periodic_timer, timeout); +} + +/** + * Function handles mux state machine. + * + * Function implements Mux Machine from point 5.4.15 in 802.1AX documentation. + * It should be called periodically. + * + * @param port Port to handle state machine. + */ +static void +mux_machine(struct bond_dev_private *internals, uint8_t slave_id) +{ + struct port *port = &mode_8023ad_ports[slave_id]; + + /* Save current state for later use */ + const uint8_t state_mask = STATE_SYNCHRONIZATION | STATE_DISTRIBUTING | + STATE_COLLECTING; + + /* Enter DETACHED state on BEGIN condition or from any other state if + * port was unselected */ + if (SM_FLAG(port, BEGIN) || + port->selected == UNSELECTED || (port->selected == STANDBY && + (port->actor_state & state_mask) != 0)) { + /* detach mux from aggregator */ + port->actor_state &= ~state_mask; + /* Set ntt to true if BEGIN condition or transition from any other state + * which is indicated that wait_while_timer was started */ + if (SM_FLAG(port, BEGIN) || + !timer_is_stopped(&port->wait_while_timer)) { + SM_FLAG_SET(port, NTT); + MODE4_DEBUG("-> DETACHED\n"); + } + timer_cancel(&port->wait_while_timer); + } + + if (timer_is_stopped(&port->wait_while_timer)) { + if (port->selected == SELECTED || port->selected == STANDBY) { + timer_set(&port->wait_while_timer, + internals->mode4.aggregate_wait_timeout); + + MODE4_DEBUG("DETACHED -> WAITING\n"); + } + /* Waiting state entered */ + return; + } + + /* Transit next state if port is ready */ + if (!timer_is_expired(&port->wait_while_timer)) + return; + + if ((ACTOR_STATE(port, DISTRIBUTING) || ACTOR_STATE(port, COLLECTING)) && + !PARTNER_STATE(port, SYNCHRONIZATION)) { + /* If in COLLECTING or DISTRIBUTING state and partner becomes out of + * sync transit to ATACHED state. */ + ACTOR_STATE_CLR(port, DISTRIBUTING); + ACTOR_STATE_CLR(port, COLLECTING); + /* Clear actor sync to activate transit ATACHED in condition bellow */ + ACTOR_STATE_CLR(port, SYNCHRONIZATION); + MODE4_DEBUG("Out of sync -> ATTACHED\n"); + } + + if (!ACTOR_STATE(port, SYNCHRONIZATION)) { + /* attach mux to aggregator */ + RTE_VERIFY((port->actor_state & (STATE_COLLECTING | + STATE_DISTRIBUTING)) == 0); + + ACTOR_STATE_SET(port, SYNCHRONIZATION); + SM_FLAG_SET(port, NTT); + MODE4_DEBUG("ATTACHED Entered\n"); + } else if (!ACTOR_STATE(port, COLLECTING)) { + /* Start collecting if in sync */ + if (PARTNER_STATE(port, SYNCHRONIZATION)) { + MODE4_DEBUG("ATTACHED -> COLLECTING\n"); + ACTOR_STATE_SET(port, COLLECTING); + SM_FLAG_SET(port, NTT); + } + } else if (ACTOR_STATE(port, COLLECTING)) { + /* Check if partner is in COLLECTING state. If so this port can + * distribute frames to it */ + if (!ACTOR_STATE(port, DISTRIBUTING)) { + if (PARTNER_STATE(port, COLLECTING)) { + /* Enable DISTRIBUTING if partner is collecting */ + ACTOR_STATE_SET(port, DISTRIBUTING); + SM_FLAG_SET(port, NTT); + MODE4_DEBUG("COLLECTING -> DISTRIBUTING\n"); + RTE_LOG(INFO, PMD, + "Bond %u: slave id %u distributing started.\n", + internals->port_id, slave_id); + } + } else { + if (!PARTNER_STATE(port, COLLECTING)) { + /* Disable DISTRIBUTING (enter COLLECTING state) if partner + * is not collecting */ + ACTOR_STATE_CLR(port, DISTRIBUTING); + SM_FLAG_SET(port, NTT); + MODE4_DEBUG("DISTRIBUTING -> COLLECTING\n"); + RTE_LOG(INFO, PMD, + "Bond %u: slave id %u distributing stopped.\n", + internals->port_id, slave_id); + } + } + } +} + +/** + * Function handles transmit state machine. + * + * Function implements Transmit Machine from point 5.4.16 in 802.1AX + * documentation. + * + * @param port + */ +static void +tx_machine(struct bond_dev_private *internals, uint8_t slave_id) +{ + struct port *agg, *port = &mode_8023ad_ports[slave_id]; + + struct rte_mbuf *lacp_pkt = NULL; + struct lacpdu_header *hdr; + struct lacpdu *lacpdu; + + /* If periodic timer is not running periodic machine is in NO PERIODIC and + * according to 802.3ax standard tx machine should not transmit any frames + * and set ntt to false. */ + if (timer_is_stopped(&port->periodic_timer)) + SM_FLAG_CLR(port, NTT); + + if (!SM_FLAG(port, NTT)) + return; + + if (!timer_is_expired(&port->tx_machine_timer)) + return; + + lacp_pkt = rte_pktmbuf_alloc(port->mbuf_pool); + if (lacp_pkt == NULL) { + RTE_LOG(ERR, PMD, "Failed to allocate LACP packet from pool\n"); + return; + } + + lacp_pkt->data_len = sizeof(*hdr); + lacp_pkt->pkt_len = sizeof(*hdr); + + hdr = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *); + + /* Source and destination MAC */ + ether_addr_copy(&lacp_mac_addr, &hdr->eth_hdr.d_addr); + rte_eth_macaddr_get(slave_id, &hdr->eth_hdr.s_addr); + hdr->eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_SLOW); + + lacpdu = &hdr->lacpdu; + memset(lacpdu, 0, sizeof(*lacpdu)); + + /* Initialize LACP part */ + lacpdu->subtype = SLOW_SUBTYPE_LACP; + lacpdu->version_number = 1; + + /* ACTOR */ + lacpdu->actor.tlv_type_info = TLV_TYPE_ACTOR_INFORMATION; + lacpdu->actor.info_length = sizeof(struct lacpdu_actor_partner_params); + memcpy(&hdr->lacpdu.actor.port_params, &port->actor, + sizeof(port->actor)); + agg = &mode_8023ad_ports[port->aggregator_port_id]; + ether_addr_copy(&agg->actor.system, &hdr->lacpdu.actor.port_params.system); + lacpdu->actor.state = port->actor_state; + + /* PARTNER */ + lacpdu->partner.tlv_type_info = TLV_TYPE_PARTNER_INFORMATION; + lacpdu->partner.info_length = sizeof(struct lacpdu_actor_partner_params); + memcpy(&lacpdu->partner.port_params, &port->partner, + sizeof(struct port_params)); + lacpdu->partner.state = port->partner_state; + + /* Other fields */ + lacpdu->tlv_type_collector_info = TLV_TYPE_COLLECTOR_INFORMATION; + lacpdu->collector_info_length = 0x10; + lacpdu->collector_max_delay = 0; + + lacpdu->tlv_type_terminator = TLV_TYPE_TERMINATOR_INFORMATION; + lacpdu->terminator_length = 0; + + if (rte_ring_enqueue(port->tx_ring, lacp_pkt) == -ENOBUFS) { + /* If TX ring full, drop packet and free message. Retransmission + * will happen in next function call. */ + rte_pktmbuf_free(lacp_pkt); + set_warning_flags(port, WRN_TX_QUEUE_FULL); + return; + } + + MODE4_DEBUG("sending LACP frame\n"); + BOND_PRINT_LACP(lacpdu); + + timer_set(&port->tx_machine_timer, internals->mode4.tx_period_timeout); + SM_FLAG_CLR(port, NTT); +} + +/** + * Function assigns port to aggregator. + * + * @param bond_dev_private Pointer to bond_dev_private structure. + * @param port_pos Port to assign. + */ +static void +selection_logic(struct bond_dev_private *internals, uint8_t slave_id) +{ + struct port *agg, *port; + uint8_t slaves_count, new_agg_id, i; + uint8_t *slaves; + + slaves = internals->active_slaves; + slaves_count = internals->active_slave_count; + port = &mode_8023ad_ports[slave_id]; + + /* Search for aggregator suitable for this port */ + for (i = 0; i < slaves_count; ++i) { + agg = &mode_8023ad_ports[slaves[i]]; + /* Skip ports that are not aggreagators */ + if (agg->aggregator_port_id != slaves[i]) + continue; + + /* Actors system ID is not checked since all slave device have the same + * ID (MAC address). */ + if ((agg->actor.key == port->actor.key && + agg->partner.system_priority == port->partner.system_priority && + is_same_ether_addr(&agg->partner.system, &port->partner.system) == 1 + && (agg->partner.key == port->partner.key)) && + is_zero_ether_addr(&port->partner.system) != 1 && + (agg->actor.key & + rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) != 0) { + + break; + } + } + + /* By default, port uses it self as agregator */ + if (i == slaves_count) + new_agg_id = slave_id; + else + new_agg_id = slaves[i]; + + if (new_agg_id != port->aggregator_port_id) { + port->aggregator_port_id = new_agg_id; + + MODE4_DEBUG("-> SELECTED: ID=%3u\n" + "\t%s aggregator ID=%3u\n", + port->aggregator_port_id, + port->aggregator_port_id == slave_id ? + "aggregator not found, using default" : "aggregator found", + port->aggregator_port_id); + } + + port->selected = SELECTED; +} + +/* Function maps DPDK speed to bonding speed stored in key field */ +static uint16_t +link_speed_key(uint16_t speed) { + uint16_t key_speed; + + switch (speed) { + case ETH_LINK_SPEED_AUTONEG: + key_speed = 0x00; + break; + case ETH_LINK_SPEED_10: + key_speed = BOND_LINK_SPEED_KEY_10M; + break; + case ETH_LINK_SPEED_100: + key_speed = BOND_LINK_SPEED_KEY_100M; + break; + case ETH_LINK_SPEED_1000: + key_speed = BOND_LINK_SPEED_KEY_1000M; + break; + case ETH_LINK_SPEED_10G: + key_speed = BOND_LINK_SPEED_KEY_10G; + break; + case ETH_LINK_SPEED_20G: + key_speed = BOND_LINK_SPEED_KEY_20G; + break; + case ETH_LINK_SPEED_40G: + key_speed = BOND_LINK_SPEED_KEY_40G; + break; + default: + /* Unknown speed*/ + key_speed = 0xFFFF; + } + + return key_speed; +} + +static void +bond_mode_8023ad_periodic_cb(void *arg) +{ + struct rte_eth_dev *bond_dev = arg; + struct bond_dev_private *internals = bond_dev->data->dev_private; + struct port *port; + struct rte_eth_link link_info; + struct ether_addr slave_addr; + + void *pkt = NULL; + uint8_t i, slave_id; + + + /* Update link status on each port */ + for (i = 0; i < internals->active_slave_count; i++) { + uint16_t key; + + slave_id = internals->active_slaves[i]; + rte_eth_link_get(slave_id, &link_info); + rte_eth_macaddr_get(slave_id, &slave_addr); + + if (link_info.link_status != 0) { + key = link_speed_key(link_info.link_speed) << 1; + if (link_info.link_duplex == ETH_LINK_FULL_DUPLEX) + key |= BOND_LINK_FULL_DUPLEX_KEY; + } else + key = 0; + + port = &mode_8023ad_ports[slave_id]; + + key = rte_cpu_to_be_16(key); + if (key != port->actor.key) { + if (!(key & rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY))) + set_warning_flags(port, WRN_NOT_LACP_CAPABLE); + + port->actor.key = key; + SM_FLAG_SET(port, NTT); + } + + if (!is_same_ether_addr(&port->actor.system, &slave_addr)) { + ether_addr_copy(&slave_addr, &port->actor.system); + if (port->aggregator_port_id == slave_id) + SM_FLAG_SET(port, NTT); + } + } + + for (i = 0; i < internals->active_slave_count; i++) { + slave_id = internals->active_slaves[i]; + port = &mode_8023ad_ports[slave_id]; + + if ((port->actor.key & + rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) == 0) { + + SM_FLAG_SET(port, BEGIN); + + /* LACP is disabled on half duples or link is down */ + if (SM_FLAG(port, LACP_ENABLED)) { + /* If port was enabled set it to BEGIN state */ + SM_FLAG_CLR(port, LACP_ENABLED); + ACTOR_STATE_CLR(port, DISTRIBUTING); + ACTOR_STATE_CLR(port, COLLECTING); + } + + /* Skip this port processing */ + continue; + } + + SM_FLAG_SET(port, LACP_ENABLED); + + /* Find LACP packet to this port. Do not check subtype, it is done in + * function that queued packet */ + if (rte_ring_dequeue(port->rx_ring, &pkt) == 0) { + struct rte_mbuf *lacp_pkt = pkt; + struct lacpdu_header *lacp; + + lacp = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *); + RTE_VERIFY(lacp->lacpdu.subtype == SLOW_SUBTYPE_LACP); + + /* This is LACP frame so pass it to rx_machine */ + rx_machine(internals, slave_id, &lacp->lacpdu); + rte_pktmbuf_free(lacp_pkt); + } else + rx_machine(internals, slave_id, NULL); + + periodic_machine(internals, slave_id); + mux_machine(internals, slave_id); + tx_machine(internals, slave_id); + selection_logic(internals, slave_id); + + SM_FLAG_CLR(port, BEGIN); + show_warnings(slave_id); + } + + rte_eal_alarm_set(internals->mode4.update_timeout_us, + bond_mode_8023ad_periodic_cb, arg); +} + +void +bond_mode_8023ad_activate_slave(struct rte_eth_dev *bond_dev, uint8_t slave_id) +{ + struct bond_dev_private *internals = bond_dev->data->dev_private; + + struct port *port = &mode_8023ad_ports[slave_id]; + struct port_params initial = { + .system = { { 0 } }, + .system_priority = rte_cpu_to_be_16(0xFFFF), + .key = rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY), + .port_priority = rte_cpu_to_be_16(0x00FF), + .port_number = 0, + }; + + char mem_name[RTE_ETH_NAME_MAX_LEN]; + uint8_t socket_id; + unsigned element_size; + + /* Given slave mus not be in active list */ + RTE_VERIFY(find_slave_by_id(internals->active_slaves, + internals->active_slave_count, slave_id) == internals->active_slave_count); + + memcpy(&port->actor, &initial, sizeof(struct port_params)); + /* Standard requires that port ID must be grater than 0. + * Add 1 do get corresponding port_number */ + port->actor.port_number = rte_cpu_to_be_16((uint16_t)slave_id + 1); + + memcpy(&port->partner, &initial, sizeof(struct port_params)); + + /* default states */ + port->actor_state = STATE_AGGREGATION | STATE_LACP_ACTIVE | STATE_DEFAULTED; + port->partner_state = STATE_LACP_ACTIVE; + port->sm_flags = SM_FLAGS_BEGIN; + + /* use this port as agregator */ + port->aggregator_port_id = slave_id; + rte_eth_promiscuous_enable(slave_id); + + timer_cancel(&port->warning_timer); + + if (port->mbuf_pool != NULL) + return; + + RTE_VERIFY(port->rx_ring == NULL); + RTE_VERIFY(port->tx_ring == NULL); + socket_id = rte_eth_devices[slave_id].pci_dev->numa_node; + + element_size = sizeof(struct slow_protocol_frame) + sizeof(struct rte_mbuf) + + RTE_PKTMBUF_HEADROOM; + + /* How big memory pool should be? If driver will not + * free packets quick enough there will be ENOMEM in tx_machine. + * For now give 511 pkts * max number of queued TX packets per slave. + * Hope it will be enough. */ + snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_pool", slave_id); + port->mbuf_pool = rte_mempool_create(mem_name, + BOND_MODE_8023AX_SLAVE_TX_PKTS * 512 - 1, + element_size, + RTE_MEMPOOL_CACHE_MAX_SIZE >= 32 ? 32 : RTE_MEMPOOL_CACHE_MAX_SIZE, + sizeof(struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init, + NULL, rte_pktmbuf_init, NULL, socket_id, MEMPOOL_F_NO_SPREAD); + + /* Any memory allocation failure in initalization is critical because + * resources can't be free, so reinitialization is impossible. */ + if (port->mbuf_pool == NULL) { + rte_panic("Slave %u: Failed to create memory pool '%s': %s\n", + slave_id, mem_name, rte_strerror(rte_errno)); + } + + snprintf(mem_name, RTE_DIM(mem_name), "slave_%u_rx", slave_id); + port->rx_ring = rte_ring_create(mem_name, + rte_align32pow2(BOND_MODE_8023AX_SLAVE_RX_PKTS), socket_id, 0); + + if (port->rx_ring == NULL) { + rte_panic("Slave %u: Failed to create rx ring '%s': %s\n", slave_id, + mem_name, rte_strerror(rte_errno)); + } + + /* TX ring is at least one pkt longer to make room for marker packet. */ + snprintf(mem_name, RTE_DIM(mem_name), "slave_%u_tx", slave_id); + port->tx_ring = rte_ring_create(mem_name, + rte_align32pow2(BOND_MODE_8023AX_SLAVE_TX_PKTS + 1), socket_id, 0); + + if (port->tx_ring == NULL) { + rte_panic("Slave %u: Failed to create tx ring '%s': %s\n", slave_id, + mem_name, rte_strerror(rte_errno)); + } +} + +int +bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *bond_dev, + uint8_t slave_id) +{ + struct bond_dev_private *internals = bond_dev->data->dev_private; + void *pkt = NULL; + struct port *port; + uint8_t i; + + /* Given slave mus be in active list */ + RTE_VERIFY(find_slave_by_id(internals->active_slaves, + internals->active_slave_count, slave_id) < internals->active_slave_count); + + /* Exclude slave from transmit policy. If this slave is an aggregator + * make all aggregated slaves unselected to force sellection logic + * to select suitable aggregator for this port. */ + for (i = 0; i < internals->active_slave_count; i++) { + port = &mode_8023ad_ports[internals->active_slaves[i]]; + if (port->aggregator_port_id != slave_id) + continue; + + port->selected = UNSELECTED; + + /* Use default aggregator */ + port->aggregator_port_id = internals->active_slaves[i]; + } + + port = &mode_8023ad_ports[slave_id]; + port->selected = UNSELECTED; + port->actor_state &= ~(STATE_SYNCHRONIZATION | STATE_DISTRIBUTING | + STATE_COLLECTING); + + while (rte_ring_dequeue(port->rx_ring, &pkt) == 0) + rte_pktmbuf_free((struct rte_mbuf *)pkt); + + while (rte_ring_dequeue(port->tx_ring, &pkt) == 0) + rte_pktmbuf_free((struct rte_mbuf *)pkt); + return 0; +} + +void +bond_mode_8023ad_mac_address_update(struct rte_eth_dev *bond_dev) +{ + struct bond_dev_private *internals = bond_dev->data->dev_private; + struct ether_addr slave_addr; + struct port *slave, *agg_slave; + uint8_t slave_id, i, j; + + bond_mode_8023ad_stop(bond_dev); + + for (i = 0; i < internals->active_slave_count; i++) { + slave_id = internals->active_slaves[i]; + slave = &mode_8023ad_ports[slave_id]; + rte_eth_macaddr_get(slave_id, &slave_addr); + + if (is_same_ether_addr(&slave_addr, &slave->actor.system)) + continue; + + ether_addr_copy(&slave_addr, &slave->actor.system); + /* Do nothing if this port is not an aggregator. In other case + * Set NTT flag on every port that use this aggregator. */ + if (slave->aggregator_port_id != slave_id) + continue; + + for (j = 0; j < internals->active_slave_count; j++) { + agg_slave = &mode_8023ad_ports[internals->active_slaves[j]]; + if (agg_slave->aggregator_port_id == slave_id) + SM_FLAG_SET(agg_slave, NTT); + } + } + + if (bond_dev->data->dev_started) + bond_mode_8023ad_start(bond_dev); +} + +void +bond_mode_8023ad_conf_get(struct rte_eth_dev *dev, + struct rte_eth_bond_8023ad_conf *conf) +{ + struct bond_dev_private *internals = dev->data->dev_private; + struct mode8023ad_private *mode4 = &internals->mode4; + uint64_t ms_ticks = rte_get_tsc_hz() / 1000; + + conf->fast_periodic_ms = mode4->fast_periodic_timeout / ms_ticks; + conf->slow_periodic_ms = mode4->slow_periodic_timeout / ms_ticks; + conf->short_timeout_ms = mode4->short_timeout / ms_ticks; + conf->long_timeout_ms = mode4->long_timeout / ms_ticks; + conf->aggregate_wait_timeout_ms = mode4->aggregate_wait_timeout / ms_ticks; + conf->tx_period_ms = mode4->tx_period_timeout / ms_ticks; + conf->update_timeout_ms = mode4->update_timeout_us / 1000; +} + +void +bond_mode_8023ad_setup(struct rte_eth_dev *dev, + struct rte_eth_bond_8023ad_conf *conf) +{ + struct rte_eth_bond_8023ad_conf def_conf; + struct bond_dev_private *internals = dev->data->dev_private; + struct mode8023ad_private *mode4 = &internals->mode4; + uint64_t ms_ticks = rte_get_tsc_hz() / 1000; + + if (conf == NULL) { + conf = &def_conf; + conf->fast_periodic_ms = BOND_8023AD_FAST_PERIODIC_MS; + conf->slow_periodic_ms = BOND_8023AD_SLOW_PERIODIC_MS; + conf->short_timeout_ms = BOND_8023AD_SHORT_TIMEOUT_MS; + conf->long_timeout_ms = BOND_8023AD_LONG_TIMEOUT_MS; + conf->aggregate_wait_timeout_ms = BOND_8023AD_AGGREGATE_WAIT_TIMEOUT_MS; + conf->tx_period_ms = BOND_8023AD_TX_MACHINE_PERIOD_MS; + conf->rx_marker_period_ms = BOND_8023AD_RX_MARKER_PERIOD_MS; + conf->update_timeout_ms = BOND_MODE_8023AX_UPDATE_TIMEOUT_MS; + } + + mode4->fast_periodic_timeout = conf->fast_periodic_ms * ms_ticks; + mode4->slow_periodic_timeout = conf->slow_periodic_ms * ms_ticks; + mode4->short_timeout = conf->short_timeout_ms * ms_ticks; + mode4->long_timeout = conf->long_timeout_ms * ms_ticks; + mode4->aggregate_wait_timeout = conf->aggregate_wait_timeout_ms * ms_ticks; + mode4->tx_period_timeout = conf->tx_period_ms * ms_ticks; + mode4->rx_marker_timeout = conf->rx_marker_period_ms * ms_ticks; + mode4->update_timeout_us = conf->update_timeout_ms * 1000; +} + +int +bond_mode_8023ad_enable(struct rte_eth_dev *bond_dev) +{ + struct bond_dev_private *internals = bond_dev->data->dev_private; + uint8_t i; + + for (i = 0; i < internals->active_slave_count; i++) + bond_mode_8023ad_activate_slave(bond_dev, i); + + return 0; +} + +int +bond_mode_8023ad_start(struct rte_eth_dev *bond_dev) +{ + return rte_eal_alarm_set(BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * 1000, + &bond_mode_8023ad_periodic_cb, bond_dev); +} + +void +bond_mode_8023ad_stop(struct rte_eth_dev *bond_dev) +{ + rte_eal_alarm_cancel(&bond_mode_8023ad_periodic_cb, bond_dev); +} + +void +bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals, + uint8_t slave_id, struct rte_mbuf *pkt) +{ + struct mode8023ad_private *mode4 = &internals->mode4; + struct port *port = &mode_8023ad_ports[slave_id]; + struct marker_header *m_hdr; + uint64_t marker_timer, old_marker_timer; + int retval; + uint8_t wrn, subtype; + /* If packet is a marker, we send response now by reusing given packet + * and update only source MAC, destination MAC is multicast so don't + * update it. Other frames will be handled later by state machines */ + subtype = rte_pktmbuf_mtod(pkt, + struct slow_protocol_frame *)->slow_protocol.subtype; + + if (subtype == SLOW_SUBTYPE_MARKER) { + m_hdr = rte_pktmbuf_mtod(pkt, struct marker_header *); + + if (likely(m_hdr->marker.tlv_type_marker != MARKER_TLV_TYPE_INFO)) { + wrn = WRN_UNKNOWN_MARKER_TYPE; + goto free_out; + } + + /* Setup marker timer. Do it in loop in case concurent access. */ + do { + old_marker_timer = port->rx_marker_timer; + if (!timer_is_expired(&old_marker_timer)) { + wrn = WRN_RX_MARKER_TO_FAST; + goto free_out; + } + + timer_set(&marker_timer, mode4->rx_marker_timeout); + retval = rte_atomic64_cmpset(&port->rx_marker_timer, + old_marker_timer, marker_timer); + } while (unlikely(retval == 0)); + + m_hdr->marker.tlv_type_marker = MARKER_TLV_TYPE_RESP; + rte_eth_macaddr_get(slave_id, &m_hdr->eth_hdr.s_addr); + + if (unlikely(rte_ring_enqueue(port->tx_ring, pkt) == -ENOBUFS)) { + /* reset timer */ + port->rx_marker_timer = 0; + wrn = WRN_TX_QUEUE_FULL; + goto free_out; + } + } else if (likely(subtype == SLOW_SUBTYPE_LACP)) { + if (unlikely(rte_ring_enqueue(port->rx_ring, pkt) == -ENOBUFS)) { + /* If RX fing full free lacpdu message and drop packet */ + wrn = WRN_RX_QUEUE_FULL; + goto free_out; + } + } else { + wrn = WRN_UNKNOWN_SLOW_TYPE; + goto free_out; + } + + return; + +free_out: + set_warning_flags(port, wrn); + rte_pktmbuf_free(pkt); +} + +int +rte_eth_bond_8023ad_conf_get(uint8_t port_id, + struct rte_eth_bond_8023ad_conf *conf) +{ + struct rte_eth_dev *bond_dev; + + if (valid_bonded_port_id(port_id) != 0) + return -EINVAL; + + if (conf == NULL) + return -EINVAL; + + bond_dev = &rte_eth_devices[port_id]; + bond_mode_8023ad_conf_get(bond_dev, conf); + return 0; +} + +int +rte_eth_bond_8023ad_setup(uint8_t port_id, + struct rte_eth_bond_8023ad_conf *conf) +{ + struct rte_eth_dev *bond_dev; + + if (valid_bonded_port_id(port_id) != 0) + return -EINVAL; + + if (conf != NULL) { + /* Basic sanity check */ + if (conf->slow_periodic_ms == 0 || + conf->fast_periodic_ms >= conf->slow_periodic_ms || + conf->long_timeout_ms == 0 || + conf->short_timeout_ms >= conf->long_timeout_ms || + conf->aggregate_wait_timeout_ms == 0 || + conf->tx_period_ms == 0 || + conf->rx_marker_period_ms == 0 || + conf->update_timeout_ms == 0) { + RTE_LOG(ERR, PMD, "given mode 4 configuration is invalid\n"); + return -EINVAL; + } + } + + bond_dev = &rte_eth_devices[port_id]; + bond_mode_8023ad_setup(bond_dev, conf); + + return 0; +} + +int +rte_eth_bond_8023ad_slave_info(uint8_t port_id, uint8_t slave_id, + struct rte_eth_bond_8023ad_slave_info *info) +{ + struct rte_eth_dev *bond_dev; + struct bond_dev_private *internals; + struct port *port; + + if (info == NULL || valid_bonded_port_id(port_id) != 0 || + rte_eth_bond_mode_get(port_id) != BONDING_MODE_8023AD) + return -EINVAL; + + bond_dev = &rte_eth_devices[port_id]; + + internals = bond_dev->data->dev_private; + if (find_slave_by_id(internals->active_slaves, + internals->active_slave_count, slave_id) == + internals->active_slave_count) + return -EINVAL; + + port = &mode_8023ad_ports[slave_id]; + info->selected = port->selected; + + info->actor_state = port->actor_state; + rte_memcpy(&info->actor, &port->actor, sizeof(port->actor)); + + info->partner_state = port->partner_state; + rte_memcpy(&info->partner, &port->partner, sizeof(port->partner)); + + info->agg_port_id = port->aggregator_port_id; + return 0; +} diff --git a/drivers/bonding/rte_eth_bond_8023ad.h b/drivers/bonding/rte_eth_bond_8023ad.h new file mode 100644 index 0000000..ebd0e93 --- /dev/null +++ b/drivers/bonding/rte_eth_bond_8023ad.h @@ -0,0 +1,222 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef RTE_ETH_BOND_8023AD_H_ +#define RTE_ETH_BOND_8023AD_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Actor/partner states + */ +#define STATE_LACP_ACTIVE 0x01 +#define STATE_LACP_SHORT_TIMEOUT 0x02 +#define STATE_AGGREGATION 0x04 +#define STATE_SYNCHRONIZATION 0x08 +#define STATE_COLLECTING 0x10 +#define STATE_DISTRIBUTING 0x20 +/** Partners parameters are defaulted */ +#define STATE_DEFAULTED 0x40 +#define STATE_EXPIRED 0x80 + +#define TLV_TYPE_ACTOR_INFORMATION 0x01 +#define TLV_TYPE_PARTNER_INFORMATION 0x02 +#define TLV_TYPE_COLLECTOR_INFORMATION 0x03 +#define TLV_TYPE_TERMINATOR_INFORMATION 0x00 + +#define SLOW_SUBTYPE_LACP 0x01 +#define SLOW_SUBTYPE_MARKER 0x02 + +#define MARKER_TLV_TYPE_INFO 0x01 +#define MARKER_TLV_TYPE_RESP 0x02 + +enum rte_bond_8023ad_selection { + UNSELECTED, + STANDBY, + SELECTED +}; + +/** Generic slow protocol structure */ +struct slow_protocol { + uint8_t subtype; + uint8_t reserved_119[119]; +} __attribute__((__packed__)); + +/** Generic slow protocol frame type structure */ +struct slow_protocol_frame { + struct ether_hdr eth_hdr; + struct slow_protocol slow_protocol; +} __attribute__((__packed__)); + +struct port_params { + uint16_t system_priority; + /**< System priority (unused in current implementation) */ + struct ether_addr system; + /**< System ID - Slave MAC address, same as bonding MAC address */ + uint16_t key; + /**< Speed information (implementation dependednt) and duplex. */ + uint16_t port_priority; + /**< Priority of this (unused in current implementation) */ + uint16_t port_number; + /**< Port number. It corresponds to slave port id. */ +} __attribute__((__packed__)); + +struct lacpdu_actor_partner_params { + uint8_t tlv_type_info; + uint8_t info_length; + struct port_params port_params; + uint8_t state; + uint8_t reserved_3[3]; +} __attribute__((__packed__)); + +/** LACPDU structure (5.4.2 in 802.1AX documentation). */ +struct lacpdu { + uint8_t subtype; + uint8_t version_number; + + struct lacpdu_actor_partner_params actor; + struct lacpdu_actor_partner_params partner; + + uint8_t tlv_type_collector_info; + uint8_t collector_info_length; + uint16_t collector_max_delay; + uint8_t reserved_12[12]; + + uint8_t tlv_type_terminator; + uint8_t terminator_length; + uint8_t reserved_50[50]; +} __attribute__((__packed__)); + +/** LACPDU frame: Contains ethernet header and LACPDU. */ +struct lacpdu_header { + struct ether_hdr eth_hdr; + struct lacpdu lacpdu; +} __attribute__((__packed__)); + +struct marker { + uint8_t subtype; + uint8_t version_number; + + uint8_t tlv_type_marker; + uint8_t info_length; + uint16_t requester_port; + struct ether_addr requester_system; + uint32_t requester_transaction_id; + uint8_t reserved_2[2]; + + uint8_t tlv_type_terminator; + uint8_t terminator_length; + uint8_t reserved_90[90]; +} __attribute__((__packed__)); + +struct marker_header { + struct ether_hdr eth_hdr; + struct marker marker; +} __attribute__((__packed__)); + +struct rte_eth_bond_8023ad_conf { + uint32_t fast_periodic_ms; + uint32_t slow_periodic_ms; + uint32_t short_timeout_ms; + uint32_t long_timeout_ms; + uint32_t aggregate_wait_timeout_ms; + uint32_t tx_period_ms; + uint32_t rx_marker_period_ms; + uint32_t update_timeout_ms; +}; + +struct rte_eth_bond_8023ad_slave_info { + enum rte_bond_8023ad_selection selected; + uint8_t actor_state; + struct port_params actor; + uint8_t partner_state; + struct port_params partner; + uint8_t agg_port_id; +}; + +/** + * @internal + * + * Function returns current configuration of 802.3AX mode. + * + * @param port_id Bonding device id + * @param conf Pointer to timeout structure. + * + * @return + * 0 - if ok + * -EINVAL if conf is NULL + */ +int +rte_eth_bond_8023ad_conf_get(uint8_t port_id, + struct rte_eth_bond_8023ad_conf *conf); + +/** + * @internal + * + * Function set new configuration of 802.3AX mode. + * + * @param port_id Bonding device id + * @param conf Configuration, if NULL set default configuration. + * @return + * 0 - if ok + * -EINVAL if configuration is invalid. + */ +int +rte_eth_bond_8023ad_setup(uint8_t port_id, + struct rte_eth_bond_8023ad_conf *conf); + +/** + * @internal + * + * Function returns current state of given slave device. + * + * @param slave_id Port id of valid slave. + * @param conf buffer for configuration + * @return + * 0 - if ok + * -EINVAL if conf is NULL or slave id is invalid (not a slave of given + * bonded device or is not inactive). + */ +int +rte_eth_bond_8023ad_slave_info(uint8_t port_id, uint8_t slave_id, + struct rte_eth_bond_8023ad_slave_info *conf); + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_ETH_BOND_8023AD_H_ */ diff --git a/drivers/bonding/rte_eth_bond_8023ad_private.h b/drivers/bonding/rte_eth_bond_8023ad_private.h new file mode 100644 index 0000000..8adee70 --- /dev/null +++ b/drivers/bonding/rte_eth_bond_8023ad_private.h @@ -0,0 +1,308 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef RTE_ETH_BOND_8023AD_PRIVATE_H_ +#define RTE_ETH_BOND_8023AD_PRIVATE_H_ + +#include + +#include +#include +#include + +#include "rte_eth_bond_8023ad.h" + +#define BOND_MODE_8023AX_UPDATE_TIMEOUT_MS 100 +/** Maximum number of packets to one slave queued in TX ring. */ +#define BOND_MODE_8023AX_SLAVE_RX_PKTS 3 +/** Maximum number of LACP packets from one slave queued in TX ring. */ +#define BOND_MODE_8023AX_SLAVE_TX_PKTS 1 +/** + * Timeouts deffinitions (5.4.4 in 802.1AX documentation). + */ +#define BOND_8023AD_FAST_PERIODIC_MS 900 +#define BOND_8023AD_SLOW_PERIODIC_MS 29000 +#define BOND_8023AD_SHORT_TIMEOUT_MS 3000 +#define BOND_8023AD_LONG_TIMEOUT_MS 90000 +#define BOND_8023AD_CHURN_DETECTION_TIMEOUT_MS 60000 +#define BOND_8023AD_AGGREGATE_WAIT_TIMEOUT_MS 2000 +#define BOND_8023AD_TX_MACHINE_PERIOD_MS 500 +#define BOND_8023AD_RX_MARKER_PERIOD_MS 2000 + +/** + * Interval of showing warning message from state machines. All messages will + * be held (and gathered together) to prevent flooding. + * This is no parto of 802.1AX standard. + */ +#define BOND_8023AD_WARNINGS_PERIOD_MS 1000 + + + +/** + * State machine flags + */ +#define SM_FLAGS_BEGIN 0x0001 +#define SM_FLAGS_LACP_ENABLED 0x0002 +#define SM_FLAGS_ACTOR_CHURN 0x0004 +#define SM_FLAGS_PARTNER_CHURN 0x0008 +#define SM_FLAGS_MOVED 0x0100 +#define SM_FLAGS_PARTNER_SHORT_TIMEOUT 0x0200 +#define SM_FLAGS_NTT 0x0400 + +#define BOND_LINK_FULL_DUPLEX_KEY 0x01 +#define BOND_LINK_SPEED_KEY_10M 0x02 +#define BOND_LINK_SPEED_KEY_100M 0x04 +#define BOND_LINK_SPEED_KEY_1000M 0x08 +#define BOND_LINK_SPEED_KEY_10G 0x10 +#define BOND_LINK_SPEED_KEY_20G 0x11 +#define BOND_LINK_SPEED_KEY_40G 0x12 + +#define WRN_RX_MARKER_TO_FAST 0x01 +#define WRN_UNKNOWN_SLOW_TYPE 0x02 +#define WRN_UNKNOWN_MARKER_TYPE 0x04 +#define WRN_NOT_LACP_CAPABLE 0x08 +#define WRN_RX_QUEUE_FULL 0x10 +#define WRN_TX_QUEUE_FULL 0x20 + +#define CHECK_FLAGS(_variable, _f) ((_variable) & (_f)) +#define SET_FLAGS(_variable, _f) ((_variable) |= (_f)) +#define CLEAR_FLAGS(_variable, _f) ((_variable) &= ~(_f)) + +#define SM_FLAG(_p, _f) (!!CHECK_FLAGS((_p)->sm_flags, SM_FLAGS_ ## _f)) +#define SM_FLAG_SET(_p, _f) SET_FLAGS((_p)->sm_flags, SM_FLAGS_ ## _f) +#define SM_FLAG_CLR(_p, _f) CLEAR_FLAGS((_p)->sm_flags, SM_FLAGS_ ## _f) + +#define ACTOR_STATE(_p, _f) (!!CHECK_FLAGS((_p)->actor_state, STATE_ ## _f)) +#define ACTOR_STATE_SET(_p, _f) SET_FLAGS((_p)->actor_state, STATE_ ## _f) +#define ACTOR_STATE_CLR(_p, _f) CLEAR_FLAGS((_p)->actor_state, STATE_ ## _f) + +#define PARTNER_STATE(_p, _f) (!!CHECK_FLAGS((_p)->partner_state, STATE_ ## _f)) +#define PARTNER_STATE_SET(_p, _f) SET_FLAGS((_p)->partner_state, STATE_ ## _f) +#define PARTNER_STATE_CLR(_p, _f) CLEAR_FLAGS((_p)->partner_state, STATE_ ## _f) + +/** Variables associated with each port (5.4.7 in 802.1AX documentation). */ +struct port { + /** + * The operational values of the Actor's state parameters. Bitmask + * of port states. + */ + uint8_t actor_state; + + /** The operational Actor's port parameters */ + struct port_params actor; + + /** + * The operational value of the Actor's view of the current values of + * the Partner's state parameters. The Actor sets this variable either + * to the value received from the Partner in an LACPDU, or to the value + * of Partner_Admin_Port_State. Bitmask of port states. + */ + uint8_t partner_state; + + /** The operational Partner's port parameters */ + struct port_params partner; + + /* Additional port parameters not listed in documentation */ + /** State machine flags */ + uint16_t sm_flags; + enum rte_bond_8023ad_selection selected; + + uint64_t current_while_timer; + uint64_t periodic_timer; + uint64_t wait_while_timer; + uint64_t tx_machine_timer; + uint64_t tx_marker_timer; + /* Agregator parameters */ + /** Used aggregator port ID */ + uint16_t aggregator_port_id; + + /** Memory pool used to allocate rings */ + struct rte_mempool *mbuf_pool; + + /** Ring of LACP packets from RX burst function */ + struct rte_ring *rx_ring; + + /** Ring of slow protocol packets (LACP and MARKERS) to TX burst function */ + struct rte_ring *tx_ring; + + /** Timer which is also used as mutex. If is 0 (not running) RX marker + * packet might be responded. Otherwise shall be dropped. It is zeroed in + * mode 4 callback function after expire. */ + volatile uint64_t rx_marker_timer; + + uint64_t warning_timer; + volatile uint16_t warnings_to_show; +}; + +struct mode8023ad_private { + uint64_t fast_periodic_timeout; + uint64_t slow_periodic_timeout; + uint64_t short_timeout; + uint64_t long_timeout; + uint64_t aggregate_wait_timeout; + uint64_t tx_period_timeout; + uint64_t rx_marker_timeout; + uint64_t update_timeout_us; +}; + +/** + * @internal + * The pool of *port* structures. The size of the pool + * is configured at compile-time in the file. + */ +extern struct port mode_8023ad_ports[]; + +/* Forward declaration */ +struct bond_dev_private; + +/** + * @internal + * + * Get configuration of bonded interface. + * + * + * @param dev Bonded interface + * @param conf returned configuration + */ +void +bond_mode_8023ad_conf_get(struct rte_eth_dev *dev, + struct rte_eth_bond_8023ad_conf *conf); + +/** + * @internal + * + * Set mode 4 configuration of bonded interface. + * + * @pre Bonded interface must be stopped. + * + * @param dev Bonded interface + * @param conf new configuration. If NULL set default configuration. + */ +void +bond_mode_8023ad_setup(struct rte_eth_dev *dev, + struct rte_eth_bond_8023ad_conf *conf); + +/** + * @internal + * + * Enables 802.1AX mode and all active slaves on bonded interface. + * + * @param dev Bonded interface + * @return + * 0 on success, negative value otherwise. + */ +int +bond_mode_8023ad_enable(struct rte_eth_dev *dev); + +/** + * @internal + * + * Disables 802.1AX mode of the bonded interface and slaves. + * + * @param dev Bonded interface + * @return + * 0 on success, negative value otherwise. + */ +int bond_mode_8023ad_disable(struct rte_eth_dev *dev); + +/** + * @internal + * + * Starts 802.3AX state machines management logic. + * @param dev Bonded interface + * @return + * 0 if machines was started, 1 if machines was already running, + * negative value otherwise. + */ +int +bond_mode_8023ad_start(struct rte_eth_dev *dev); + +/** + * @internal + * + * Stops 802.3AX state machines management logic. + * @param dev Bonded interface + * @return + * 0 if this call stopped state machines, -ENOENT if alarm was not set. + */ +void +bond_mode_8023ad_stop(struct rte_eth_dev *dev); + +/** + * @internal + * + * Passes given slow packet to state machines management logic. + * @param internals Bonded device private data. + * @param slave_id Slave port id. + * @param slot_pkt Slow packet. + */ +void +bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals, + uint8_t slave_id, struct rte_mbuf *pkt); + +/** + * @internal + * + * Appends given slave used slave + * + * @param dev Bonded interface. + * @param port_id Slave port ID to be added + * + * @return + * 0 on success, negative value otherwise. + */ +void +bond_mode_8023ad_activate_slave(struct rte_eth_dev *dev, uint8_t port_id); + +/** + * @internal + * + * Denitializes and removes given slave from 802.1AX mode. + * + * @param dev Bonded interface. + * @param slave_num Position of slave in active_slaves array + * + * @return + * 0 on success, negative value otherwise. + */ +int +bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *dev, uint8_t slave_pos); + +/** + * Updates state when MAC was changed on bonded device or one of its slaves. + * @param bond_dev Bonded device + */ +void +bond_mode_8023ad_mac_address_update(struct rte_eth_dev *bond_dev); + +#endif /* RTE_ETH_BOND_8023AD_H_ */ diff --git a/drivers/bonding/rte_eth_bond_alb.c b/drivers/bonding/rte_eth_bond_alb.c new file mode 100644 index 0000000..6df318e --- /dev/null +++ b/drivers/bonding/rte_eth_bond_alb.c @@ -0,0 +1,287 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "rte_eth_bond_private.h" +#include "rte_eth_bond_alb.h" + +static inline uint8_t +simple_hash(uint8_t *hash_start, int hash_size) +{ + int i; + uint8_t hash; + + hash = 0; + for (i = 0; i < hash_size; ++i) + hash ^= hash_start[i]; + + return hash; +} + +static uint8_t +calculate_slave(struct bond_dev_private *internals) +{ + uint8_t idx; + + idx = (internals->mode6.last_slave + 1) % internals->active_slave_count; + internals->mode6.last_slave = idx; + return internals->active_slaves[idx]; +} + +int +bond_mode_alb_enable(struct rte_eth_dev *bond_dev) +{ + struct bond_dev_private *internals = bond_dev->data->dev_private; + struct client_data *hash_table = internals->mode6.client_table; + + uint16_t data_size; + char mem_name[RTE_ETH_NAME_MAX_LEN]; + int socket_id = bond_dev->pci_dev->numa_node; + + /* Fill hash table with initial values */ + memset(hash_table, 0, sizeof(struct client_data) * ALB_HASH_TABLE_SIZE); + rte_spinlock_init(&internals->mode6.lock); + internals->mode6.last_slave = ALB_NULL_INDEX; + internals->mode6.ntt = 0; + + /* Initialize memory pool for ARP packets to send */ + if (internals->mode6.mempool == NULL) { + /* + * 256 is size of ETH header, ARP header and nested VLAN headers. + * The value is chosen to be cache aligned. + */ + data_size = 256 + RTE_PKTMBUF_HEADROOM; + snprintf(mem_name, sizeof(mem_name), "%s_MODE6", bond_dev->data->name); + internals->mode6.mempool = rte_pktmbuf_pool_create(mem_name, + 512 * RTE_MAX_ETHPORTS, + RTE_MEMPOOL_CACHE_MAX_SIZE >= 32 ? + 32 : RTE_MEMPOOL_CACHE_MAX_SIZE, + 0, data_size, socket_id); + + if (internals->mode6.mempool == NULL) { + RTE_LOG(ERR, PMD, "%s: Failed to initialize ALB mempool.\n", + bond_dev->data->name); + rte_panic( + "Failed to allocate memory pool ('%s')\n" + "for bond device '%s'\n", + mem_name, bond_dev->data->name); + } + } + + return 0; +} + +void bond_mode_alb_arp_recv(struct ether_hdr *eth_h, uint16_t offset, + struct bond_dev_private *internals) { + struct arp_hdr *arp; + + struct client_data *hash_table = internals->mode6.client_table; + struct client_data *client_info; + + uint8_t hash_index; + + arp = (struct arp_hdr *) ((char *) (eth_h + 1) + offset); + + /* ARP Requests are forwarded to the application with no changes */ + if (arp->arp_op != rte_cpu_to_be_16(ARP_OP_REPLY)) + return; + + /* From now on, we analyze only ARP Reply packets */ + hash_index = simple_hash((uint8_t *) &arp->arp_data.arp_sip, + sizeof(arp->arp_data.arp_sip)); + client_info = &hash_table[hash_index]; + + /* + * We got reply for ARP Request send by the application. We need to + * update client table when received data differ from what is stored + * in ALB table and issue sending update packet to that slave. + */ + rte_spinlock_lock(&internals->mode6.lock); + if (client_info->in_use == 0 || + client_info->app_ip != arp->arp_data.arp_tip || + client_info->cli_ip != arp->arp_data.arp_sip || + !is_same_ether_addr(&client_info->cli_mac, &arp->arp_data.arp_sha) || + client_info->vlan_count != offset / sizeof(struct vlan_hdr) || + memcmp(client_info->vlan, eth_h + 1, offset) != 0 + ) { + client_info->in_use = 1; + client_info->app_ip = arp->arp_data.arp_tip; + client_info->cli_ip = arp->arp_data.arp_sip; + ether_addr_copy(&arp->arp_data.arp_sha, &client_info->cli_mac); + client_info->slave_idx = calculate_slave(internals); + rte_eth_macaddr_get(client_info->slave_idx, &client_info->app_mac); + ether_addr_copy(&client_info->app_mac, &arp->arp_data.arp_tha); + memcpy(client_info->vlan, eth_h + 1, offset); + client_info->vlan_count = offset / sizeof(struct vlan_hdr); + } + internals->mode6.ntt = 1; + rte_spinlock_unlock(&internals->mode6.lock); +} + +uint8_t +bond_mode_alb_arp_xmit(struct ether_hdr *eth_h, uint16_t offset, + struct bond_dev_private *internals) +{ + struct arp_hdr *arp; + + struct client_data *hash_table = internals->mode6.client_table; + struct client_data *client_info; + + uint8_t hash_index; + + struct ether_addr bonding_mac; + + arp = (struct arp_hdr *)((char *)(eth_h + 1) + offset); + + /* + * Traffic with src MAC other than bonding should be sent on + * current primary port. + */ + rte_eth_macaddr_get(internals->port_id, &bonding_mac); + if (!is_same_ether_addr(&bonding_mac, &arp->arp_data.arp_sha)) { + rte_eth_macaddr_get(internals->current_primary_port, + &arp->arp_data.arp_sha); + return internals->current_primary_port; + } + + hash_index = simple_hash((uint8_t *)&arp->arp_data.arp_tip, + sizeof(uint32_t)); + client_info = &hash_table[hash_index]; + + rte_spinlock_lock(&internals->mode6.lock); + if (arp->arp_op == rte_cpu_to_be_16(ARP_OP_REPLY)) { + if (client_info->in_use) { + if (client_info->app_ip == arp->arp_data.arp_sip && + client_info->cli_ip == arp->arp_data.arp_tip) { + /* Entry is already assigned to this client */ + if (!is_broadcast_ether_addr(&arp->arp_data.arp_tha)) { + ether_addr_copy(&arp->arp_data.arp_tha, + &client_info->cli_mac); + } + rte_eth_macaddr_get(client_info->slave_idx, + &client_info->app_mac); + ether_addr_copy(&client_info->app_mac, &arp->arp_data.arp_sha); + memcpy(client_info->vlan, eth_h + 1, offset); + client_info->vlan_count = offset / sizeof(struct vlan_hdr); + rte_spinlock_unlock(&internals->mode6.lock); + return client_info->slave_idx; + } + } + + /* Assign new slave to this client and update src mac in ARP */ + client_info->in_use = 1; + client_info->ntt = 0; + client_info->app_ip = arp->arp_data.arp_sip; + ether_addr_copy(&arp->arp_data.arp_tha, &client_info->cli_mac); + client_info->cli_ip = arp->arp_data.arp_tip; + client_info->slave_idx = calculate_slave(internals); + rte_eth_macaddr_get(client_info->slave_idx, &client_info->app_mac); + ether_addr_copy(&client_info->app_mac, &arp->arp_data.arp_sha); + memcpy(client_info->vlan, eth_h + 1, offset); + client_info->vlan_count = offset / sizeof(struct vlan_hdr); + rte_spinlock_unlock(&internals->mode6.lock); + return client_info->slave_idx; + } + + /* If packet is not ARP Reply, send it on current primary port. */ + rte_spinlock_unlock(&internals->mode6.lock); + rte_eth_macaddr_get(internals->current_primary_port, + &arp->arp_data.arp_sha); + return internals->current_primary_port; +} + +uint8_t +bond_mode_alb_arp_upd(struct client_data *client_info, + struct rte_mbuf *pkt, struct bond_dev_private *internals) +{ + struct ether_hdr *eth_h; + struct arp_hdr *arp_h; + uint8_t slave_idx; + + rte_spinlock_lock(&internals->mode6.lock); + eth_h = rte_pktmbuf_mtod(pkt, struct ether_hdr *); + + ether_addr_copy(&client_info->app_mac, ð_h->s_addr); + ether_addr_copy(&client_info->cli_mac, ð_h->d_addr); + if (client_info->vlan_count > 0) + eth_h->ether_type = rte_cpu_to_be_16(ETHER_TYPE_VLAN); + else + eth_h->ether_type = rte_cpu_to_be_16(ETHER_TYPE_ARP); + + arp_h = (struct arp_hdr *)((char *)eth_h + sizeof(struct ether_hdr) + + client_info->vlan_count * sizeof(struct vlan_hdr)); + + memcpy(eth_h + 1, client_info->vlan, + client_info->vlan_count * sizeof(struct vlan_hdr)); + + ether_addr_copy(&client_info->app_mac, &arp_h->arp_data.arp_sha); + arp_h->arp_data.arp_sip = client_info->app_ip; + ether_addr_copy(&client_info->cli_mac, &arp_h->arp_data.arp_tha); + arp_h->arp_data.arp_tip = client_info->cli_ip; + + arp_h->arp_hrd = rte_cpu_to_be_16(ARP_HRD_ETHER); + arp_h->arp_pro = rte_cpu_to_be_16(ETHER_TYPE_IPv4); + arp_h->arp_hln = ETHER_ADDR_LEN; + arp_h->arp_pln = sizeof(uint32_t); + arp_h->arp_op = rte_cpu_to_be_16(ARP_OP_REPLY); + + slave_idx = client_info->slave_idx; + rte_spinlock_unlock(&internals->mode6.lock); + + return slave_idx; +} + +void +bond_mode_alb_client_list_upd(struct rte_eth_dev *bond_dev) +{ + struct bond_dev_private *internals = bond_dev->data->dev_private; + struct client_data *client_info; + + int i; + + /* If active slave count is 0, it's pointless to refresh alb table */ + if (internals->active_slave_count <= 0) + return; + + rte_spinlock_lock(&internals->mode6.lock); + internals->mode6.last_slave = ALB_NULL_INDEX; + + for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) { + client_info = &internals->mode6.client_table[i]; + if (client_info->in_use) { + client_info->slave_idx = calculate_slave(internals); + rte_eth_macaddr_get(client_info->slave_idx, &client_info->app_mac); + internals->mode6.ntt = 1; + } + } + rte_spinlock_unlock(&internals->mode6.lock); +} diff --git a/drivers/bonding/rte_eth_bond_alb.h b/drivers/bonding/rte_eth_bond_alb.h new file mode 100644 index 0000000..fd7c3ae --- /dev/null +++ b/drivers/bonding/rte_eth_bond_alb.h @@ -0,0 +1,142 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef RTE_ETH_BOND_ALB_H_ +#define RTE_ETH_BOND_ALB_H_ + +#include +#include + +#define ALB_HASH_TABLE_SIZE 256 +#define ALB_NULL_INDEX 0xFFFFFFFF + +struct client_data { + /** ARP data of single client */ + struct ether_addr app_mac; + /**< MAC address of application running DPDK */ + uint32_t app_ip; + /**< IP address of application running DPDK */ + struct ether_addr cli_mac; + /**< Client MAC address */ + uint32_t cli_ip; + /**< Client IP address */ + + uint8_t slave_idx; + /**< Index of slave on which we connect with that client */ + uint8_t in_use; + /**< Flag indicating if entry in client table is currently used */ + uint8_t ntt; + /**< Flag indicating if we need to send update to this client on next tx */ + + struct vlan_hdr vlan[2]; + /**< Content of vlan headers */ + uint8_t vlan_count; + /**< Number of nested vlan headers */ +}; + +struct mode_alb_private { + struct client_data client_table[ALB_HASH_TABLE_SIZE]; + /**< Hash table storing ARP data of every client connected */ + struct rte_mempool *mempool; + /**< Mempool for creating ARP update packets */ + uint8_t ntt; + /**< Flag indicating if we need to send update to any client on next tx */ + uint32_t last_slave; + /**< Index of last used slave in client table */ + rte_spinlock_t lock; +}; + +/** + * ALB mode initialization. + * + * @param bond_dev Pointer to bonding device. + * + * @return + * Error code - 0 on success. + */ +int +bond_mode_alb_enable(struct rte_eth_dev *bond_dev); + +/** + * Function handles ARP packet reception. If received ARP request, it is + * forwarded to application without changes. If it is ARP reply, client table + * is updated. + * + * @param eth_h ETH header of received packet. + * @param offset Vlan header offset. + * @param internals Bonding data. + */ +void +bond_mode_alb_arp_recv(struct ether_hdr *eth_h, uint16_t offset, + struct bond_dev_private *internals); + +/** + * Function handles ARP packet transmission. It also decides on which slave + * send that packet. If packet is ARP Request, it is send on primary slave. + * If it is ARP Reply, it is send on slave stored in client table for that + * connection. On Reply function also updates data in client table. + * + * @param eth_h ETH header of transmitted packet. + * @param offset Vlan header offset. + * @param internals Bonding data. + * + * @return + * Index of slave on which packet should be sent. + */ +uint8_t +bond_mode_alb_arp_xmit(struct ether_hdr *eth_h, uint16_t offset, + struct bond_dev_private *internals); + +/** + * Function fills packet with ARP data from client_info. + * + * @param client_info Data of client to which packet is sent. + * @param pkt Pointer to packet which is sent. + * @param internals Bonding data. + * + * @return + * Index of slawe on which packet should be sent. + */ +uint8_t +bond_mode_alb_arp_upd(struct client_data *client_info, + struct rte_mbuf *pkt, struct bond_dev_private *internals); + +/** + * Function updates slave indexes of active connections. + * + * @param bond_dev Pointer to bonded device struct. + */ +void +bond_mode_alb_client_list_upd(struct rte_eth_dev *bond_dev); + +#endif /* RTE_ETH_BOND_ALB_H_ */ diff --git a/drivers/bonding/rte_eth_bond_api.c b/drivers/bonding/rte_eth_bond_api.c new file mode 100644 index 0000000..e91a623 --- /dev/null +++ b/drivers/bonding/rte_eth_bond_api.c @@ -0,0 +1,840 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include +#include +#include +#include + +#include "rte_eth_bond.h" +#include "rte_eth_bond_private.h" +#include "rte_eth_bond_8023ad_private.h" + +#define DEFAULT_POLLING_INTERVAL_10_MS (10) + +int +valid_bonded_ethdev(struct rte_eth_dev *eth_dev) +{ + size_t len; + + /* Check valid pointer */ + if (eth_dev->driver->pci_drv.name == NULL || driver_name == NULL) + return -1; + + /* Check string lengths are equal */ + len = strlen(driver_name); + if (strlen(eth_dev->driver->pci_drv.name) != len) + return -1; + + /* Compare strings */ + return strncmp(eth_dev->driver->pci_drv.name, driver_name, len); +} + +int +valid_port_id(uint8_t port_id) +{ + /* Verify that port id is valid */ + int ethdev_count = rte_eth_dev_count(); + if (port_id >= ethdev_count) { + RTE_BOND_LOG(ERR, "Port Id %d is greater than rte_eth_dev_count %d", + port_id, ethdev_count); + return -1; + } + + return 0; +} + +int +valid_bonded_port_id(uint8_t port_id) +{ + /* Verify that port id's are valid */ + if (valid_port_id(port_id)) + return -1; + + /* Verify that bonded_port_id refers to a bonded port */ + if (valid_bonded_ethdev(&rte_eth_devices[port_id])) { + RTE_BOND_LOG(ERR, "Specified port Id %d is not a bonded eth_dev device", + port_id); + return -1; + } + + return 0; +} + +int +valid_slave_port_id(uint8_t port_id) +{ + /* Verify that port id's are valid */ + if (valid_port_id(port_id)) + return -1; + + /* Verify that port_id refers to a non bonded port */ + if (!valid_bonded_ethdev(&rte_eth_devices[port_id])) + return -1; + + return 0; +} + +void +activate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id) +{ + struct bond_dev_private *internals = eth_dev->data->dev_private; + uint8_t active_count = internals->active_slave_count; + + if (internals->mode == BONDING_MODE_8023AD) + bond_mode_8023ad_activate_slave(eth_dev, port_id); + + if (internals->mode == BONDING_MODE_TLB + || internals->mode == BONDING_MODE_ALB) { + + internals->tlb_slaves_order[active_count] = port_id; + } + + RTE_VERIFY(internals->active_slave_count < + (RTE_DIM(internals->active_slaves) - 1)); + + internals->active_slaves[internals->active_slave_count] = port_id; + internals->active_slave_count++; + + if (internals->mode == BONDING_MODE_TLB) + bond_tlb_activate_slave(internals); + if (internals->mode == BONDING_MODE_ALB) + bond_mode_alb_client_list_upd(eth_dev); +} + +void +deactivate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id) +{ + uint8_t slave_pos; + struct bond_dev_private *internals = eth_dev->data->dev_private; + uint8_t active_count = internals->active_slave_count; + + if (internals->mode == BONDING_MODE_8023AD) { + bond_mode_8023ad_stop(eth_dev); + bond_mode_8023ad_deactivate_slave(eth_dev, port_id); + } else if (internals->mode == BONDING_MODE_TLB + || internals->mode == BONDING_MODE_ALB) + bond_tlb_disable(internals); + + slave_pos = find_slave_by_id(internals->active_slaves, active_count, + port_id); + + /* If slave was not at the end of the list + * shift active slaves up active array list */ + if (slave_pos < active_count) { + active_count--; + memmove(internals->active_slaves + slave_pos, + internals->active_slaves + slave_pos + 1, + (active_count - slave_pos) * + sizeof(internals->active_slaves[0])); + } + + RTE_VERIFY(active_count < RTE_DIM(internals->active_slaves)); + internals->active_slave_count = active_count; + + if (eth_dev->data->dev_started) { + if (internals->mode == BONDING_MODE_8023AD) { + bond_mode_8023ad_start(eth_dev); + } else if (internals->mode == BONDING_MODE_TLB) { + bond_tlb_enable(internals); + } else if (internals->mode == BONDING_MODE_ALB) { + bond_tlb_enable(internals); + bond_mode_alb_client_list_upd(eth_dev); + } + } +} + +uint8_t +number_of_sockets(void) +{ + int sockets = 0; + int i; + const struct rte_memseg *ms = rte_eal_get_physmem_layout(); + + for (i = 0; ((i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL)); i++) { + if (sockets < ms[i].socket_id) + sockets = ms[i].socket_id; + } + + /* Number of sockets = maximum socket_id + 1 */ + return ++sockets; +} + +const char *driver_name = "Link Bonding PMD"; + +int +rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id) +{ + struct rte_pci_device *pci_dev = NULL; + struct bond_dev_private *internals = NULL; + struct rte_eth_dev *eth_dev = NULL; + struct eth_driver *eth_drv = NULL; + struct rte_pci_driver *pci_drv = NULL; + struct rte_pci_id *pci_id_table = NULL; + /* now do all data allocation - for eth_dev structure, dummy pci driver + * and internal (private) data + */ + + if (name == NULL) { + RTE_BOND_LOG(ERR, "Invalid name specified"); + goto err; + } + + if (socket_id >= number_of_sockets()) { + RTE_BOND_LOG(ERR, + "Invalid socket id specified to create bonded device on."); + goto err; + } + + pci_dev = rte_zmalloc_socket(name, sizeof(*pci_dev), 0, socket_id); + if (pci_dev == NULL) { + RTE_BOND_LOG(ERR, "Unable to malloc pci dev on socket"); + goto err; + } + + eth_drv = rte_zmalloc_socket(name, sizeof(*eth_drv), 0, socket_id); + if (eth_drv == NULL) { + RTE_BOND_LOG(ERR, "Unable to malloc eth_drv on socket"); + goto err; + } + + pci_drv = ð_drv->pci_drv; + + pci_id_table = rte_zmalloc_socket(name, sizeof(*pci_id_table), 0, socket_id); + if (pci_id_table == NULL) { + RTE_BOND_LOG(ERR, "Unable to malloc pci_id_table on socket"); + goto err; + } + pci_id_table->device_id = PCI_ANY_ID; + pci_id_table->subsystem_device_id = PCI_ANY_ID; + pci_id_table->vendor_id = PCI_ANY_ID; + pci_id_table->subsystem_vendor_id = PCI_ANY_ID; + + pci_drv->id_table = pci_id_table; + pci_drv->drv_flags = RTE_PCI_DRV_INTR_LSC; + + internals = rte_zmalloc_socket(name, sizeof(*internals), 0, socket_id); + if (internals == NULL) { + RTE_BOND_LOG(ERR, "Unable to malloc internals on socket"); + goto err; + } + + /* reserve an ethdev entry */ + eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL); + if (eth_dev == NULL) { + RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev"); + goto err; + } + + pci_dev->numa_node = socket_id; + pci_drv->name = driver_name; + + eth_dev->driver = eth_drv; + eth_dev->data->dev_private = internals; + eth_dev->data->nb_rx_queues = (uint16_t)1; + eth_dev->data->nb_tx_queues = (uint16_t)1; + + TAILQ_INIT(&(eth_dev->link_intr_cbs)); + + eth_dev->data->dev_link.link_status = 0; + + eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0, + socket_id); + + eth_dev->data->dev_started = 0; + eth_dev->data->promiscuous = 0; + eth_dev->data->scattered_rx = 0; + eth_dev->data->all_multicast = 0; + + eth_dev->dev_ops = &default_dev_ops; + eth_dev->pci_dev = pci_dev; + + rte_spinlock_init(&internals->lock); + + internals->port_id = eth_dev->data->port_id; + internals->mode = BONDING_MODE_INVALID; + internals->current_primary_port = 0; + internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2; + internals->xmit_hash = xmit_l2_hash; + internals->user_defined_mac = 0; + internals->link_props_set = 0; + + internals->link_status_polling_enabled = 0; + + internals->link_status_polling_interval_ms = DEFAULT_POLLING_INTERVAL_10_MS; + internals->link_down_delay_ms = 0; + internals->link_up_delay_ms = 0; + + internals->slave_count = 0; + internals->active_slave_count = 0; + internals->rx_offload_capa = 0; + internals->tx_offload_capa = 0; + + memset(internals->active_slaves, 0, sizeof(internals->active_slaves)); + memset(internals->slaves, 0, sizeof(internals->slaves)); + + /* Set mode 4 default configuration */ + bond_mode_8023ad_setup(eth_dev, NULL); + if (bond_ethdev_mode_set(eth_dev, mode)) { + RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d", + eth_dev->data->port_id, mode); + goto err; + } + + return eth_dev->data->port_id; + +err: + rte_free(pci_dev); + rte_free(pci_id_table); + rte_free(eth_drv); + rte_free(internals); + + return -1; +} + +static int +__eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id) +{ + struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev; + struct bond_dev_private *internals; + struct bond_dev_private *temp_internals; + struct rte_eth_link link_props; + struct rte_eth_dev_info dev_info; + + int i, j; + + if (valid_slave_port_id(slave_port_id) != 0) + return -1; + + bonded_eth_dev = &rte_eth_devices[bonded_port_id]; + internals = bonded_eth_dev->data->dev_private; + + /* Verify that new slave device is not already a slave of another + * bonded device */ + for (i = rte_eth_dev_count()-1; i >= 0; i--) { + if (valid_bonded_ethdev(&rte_eth_devices[i]) == 0) { + temp_internals = rte_eth_devices[i].data->dev_private; + + for (j = 0; j < temp_internals->slave_count; j++) { + /* Device already a slave of a bonded device */ + if (temp_internals->slaves[j].port_id == slave_port_id) { + RTE_BOND_LOG(ERR, "Slave port %d is already a slave", + slave_port_id); + return -1; + } + } + } + } + + slave_eth_dev = &rte_eth_devices[slave_port_id]; + + /* Add slave details to bonded device */ + slave_add(internals, slave_eth_dev); + + memset(&dev_info, 0, sizeof(dev_info)); + rte_eth_dev_info_get(slave_port_id, &dev_info); + + if (internals->slave_count < 1) { + /* if MAC is not user defined then use MAC of first slave add to + * bonded device */ + if (!internals->user_defined_mac) + mac_address_set(bonded_eth_dev, slave_eth_dev->data->mac_addrs); + + /* Inherit eth dev link properties from first slave */ + link_properties_set(bonded_eth_dev, + &(slave_eth_dev->data->dev_link)); + + /* Make primary slave */ + internals->primary_port = slave_port_id; + + /* Take the first dev's offload capabilities */ + internals->rx_offload_capa = dev_info.rx_offload_capa; + internals->tx_offload_capa = dev_info.tx_offload_capa; + + } else { + /* Check slave link properties are supported if props are set, + * all slaves must be the same */ + if (internals->link_props_set) { + if (link_properties_valid(&(bonded_eth_dev->data->dev_link), + &(slave_eth_dev->data->dev_link))) { + RTE_BOND_LOG(ERR, + "Slave port %d link speed/duplex not supported", + slave_port_id); + return -1; + } + } else { + link_properties_set(bonded_eth_dev, + &(slave_eth_dev->data->dev_link)); + } + internals->rx_offload_capa &= dev_info.rx_offload_capa; + internals->tx_offload_capa &= dev_info.tx_offload_capa; + } + + internals->slave_count++; + + /* Update all slave devices MACs*/ + mac_address_slaves_update(bonded_eth_dev); + + if (bonded_eth_dev->data->dev_started) { + if (slave_configure(bonded_eth_dev, slave_eth_dev) != 0) { + RTE_BOND_LOG(ERR, "rte_bond_slaves_configure: port=%d", + slave_port_id); + return -1; + } + } + + /* Register link status change callback with bonded device pointer as + * argument*/ + rte_eth_dev_callback_register(slave_port_id, RTE_ETH_EVENT_INTR_LSC, + bond_ethdev_lsc_event_callback, &bonded_eth_dev->data->port_id); + + /* If bonded device is started then we can add the slave to our active + * slave array */ + if (bonded_eth_dev->data->dev_started) { + rte_eth_link_get_nowait(slave_port_id, &link_props); + + if (link_props.link_status == 1) + activate_slave(bonded_eth_dev, slave_port_id); + } + return 0; + +} + +int +rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id) +{ + struct rte_eth_dev *bonded_eth_dev; + struct bond_dev_private *internals; + + int retval; + + /* Verify that port id's are valid bonded and slave ports */ + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + bonded_eth_dev = &rte_eth_devices[bonded_port_id]; + internals = bonded_eth_dev->data->dev_private; + + rte_spinlock_lock(&internals->lock); + + retval = __eth_bond_slave_add_lock_free(bonded_port_id, slave_port_id); + + rte_spinlock_unlock(&internals->lock); + + return retval; +} + +static int +__eth_bond_slave_remove_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id) +{ + struct rte_eth_dev *bonded_eth_dev; + struct bond_dev_private *internals; + + int i, slave_idx; + + if (valid_slave_port_id(slave_port_id) != 0) + return -1; + + bonded_eth_dev = &rte_eth_devices[bonded_port_id]; + internals = bonded_eth_dev->data->dev_private; + + /* first remove from active slave list */ + slave_idx = find_slave_by_id(internals->active_slaves, + internals->active_slave_count, slave_port_id); + + if (slave_idx < internals->active_slave_count) + deactivate_slave(bonded_eth_dev, slave_port_id); + + slave_idx = -1; + /* now find in slave list */ + for (i = 0; i < internals->slave_count; i++) + if (internals->slaves[i].port_id == slave_port_id) { + slave_idx = i; + break; + } + + if (slave_idx < 0) { + RTE_BOND_LOG(ERR, "Couldn't find slave in port list, slave count %d", + internals->slave_count); + return -1; + } + + /* Un-register link status change callback with bonded device pointer as + * argument*/ + rte_eth_dev_callback_unregister(slave_port_id, RTE_ETH_EVENT_INTR_LSC, + bond_ethdev_lsc_event_callback, + &rte_eth_devices[bonded_port_id].data->port_id); + + /* Restore original MAC address of slave device */ + mac_address_set(&rte_eth_devices[slave_port_id], + &(internals->slaves[slave_idx].persisted_mac_addr)); + + slave_remove(internals, &rte_eth_devices[slave_port_id]); + + /* first slave in the active list will be the primary by default, + * otherwise use first device in list */ + if (internals->current_primary_port == slave_port_id) { + if (internals->active_slave_count > 0) + internals->current_primary_port = internals->active_slaves[0]; + else if (internals->slave_count > 0) + internals->current_primary_port = internals->slaves[0].port_id; + else + internals->primary_port = 0; + } + + if (internals->active_slave_count < 1) { + /* reset device link properties as no slaves are active */ + link_properties_reset(&rte_eth_devices[bonded_port_id]); + + /* if no slaves are any longer attached to bonded device and MAC is not + * user defined then clear MAC of bonded device as it will be reset + * when a new slave is added */ + if (internals->slave_count < 1 && !internals->user_defined_mac) + memset(rte_eth_devices[bonded_port_id].data->mac_addrs, 0, + sizeof(*(rte_eth_devices[bonded_port_id].data->mac_addrs))); + } + if (internals->slave_count == 0) { + internals->rx_offload_capa = 0; + internals->tx_offload_capa = 0; + } + return 0; +} + +int +rte_eth_bond_slave_remove(uint8_t bonded_port_id, uint8_t slave_port_id) +{ + struct rte_eth_dev *bonded_eth_dev; + struct bond_dev_private *internals; + int retval; + + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + bonded_eth_dev = &rte_eth_devices[bonded_port_id]; + internals = bonded_eth_dev->data->dev_private; + + rte_spinlock_lock(&internals->lock); + + retval = __eth_bond_slave_remove_lock_free(bonded_port_id, slave_port_id); + + rte_spinlock_unlock(&internals->lock); + + return retval; +} + +int +rte_eth_bond_mode_set(uint8_t bonded_port_id, uint8_t mode) +{ + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + return bond_ethdev_mode_set(&rte_eth_devices[bonded_port_id], mode); +} + +int +rte_eth_bond_mode_get(uint8_t bonded_port_id) +{ + struct bond_dev_private *internals; + + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + internals = rte_eth_devices[bonded_port_id].data->dev_private; + + return internals->mode; +} + +int +rte_eth_bond_primary_set(uint8_t bonded_port_id, uint8_t slave_port_id) +{ + struct bond_dev_private *internals; + + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + if (valid_slave_port_id(slave_port_id) != 0) + return -1; + + internals = rte_eth_devices[bonded_port_id].data->dev_private; + + internals->user_defined_primary_port = 1; + internals->primary_port = slave_port_id; + + bond_ethdev_primary_set(internals, slave_port_id); + + return 0; +} + +int +rte_eth_bond_primary_get(uint8_t bonded_port_id) +{ + struct bond_dev_private *internals; + + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + internals = rte_eth_devices[bonded_port_id].data->dev_private; + + if (internals->slave_count < 1) + return -1; + + return internals->current_primary_port; +} + +int +rte_eth_bond_slaves_get(uint8_t bonded_port_id, uint8_t slaves[], uint8_t len) +{ + struct bond_dev_private *internals; + uint8_t i; + + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + if (slaves == NULL) + return -1; + + internals = rte_eth_devices[bonded_port_id].data->dev_private; + + if (internals->slave_count > len) + return -1; + + for (i = 0; i < internals->slave_count; i++) + slaves[i] = internals->slaves[i].port_id; + + return internals->slave_count; +} + +int +rte_eth_bond_active_slaves_get(uint8_t bonded_port_id, uint8_t slaves[], + uint8_t len) +{ + struct bond_dev_private *internals; + + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + if (slaves == NULL) + return -1; + + internals = rte_eth_devices[bonded_port_id].data->dev_private; + + if (internals->active_slave_count > len) + return -1; + + memcpy(slaves, internals->active_slaves, internals->active_slave_count); + + return internals->active_slave_count; +} + +int +rte_eth_bond_mac_address_set(uint8_t bonded_port_id, + struct ether_addr *mac_addr) +{ + struct rte_eth_dev *bonded_eth_dev; + struct bond_dev_private *internals; + + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + bonded_eth_dev = &rte_eth_devices[bonded_port_id]; + internals = bonded_eth_dev->data->dev_private; + + /* Set MAC Address of Bonded Device */ + if (mac_address_set(bonded_eth_dev, mac_addr)) + return -1; + + internals->user_defined_mac = 1; + + /* Update all slave devices MACs*/ + if (internals->slave_count > 0) + return mac_address_slaves_update(bonded_eth_dev); + + return 0; +} + +int +rte_eth_bond_mac_address_reset(uint8_t bonded_port_id) +{ + struct rte_eth_dev *bonded_eth_dev; + struct bond_dev_private *internals; + + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + bonded_eth_dev = &rte_eth_devices[bonded_port_id]; + internals = bonded_eth_dev->data->dev_private; + + internals->user_defined_mac = 0; + + if (internals->slave_count > 0) { + /* Set MAC Address of Bonded Device */ + if (mac_address_set(bonded_eth_dev, + &internals->slaves[internals->primary_port].persisted_mac_addr) + != 0) { + RTE_BOND_LOG(ERR, "Failed to set MAC address on bonded device"); + return -1; + } + /* Update all slave devices MAC addresses */ + return mac_address_slaves_update(bonded_eth_dev); + } + /* No need to update anything as no slaves present */ + return 0; +} + +int +rte_eth_bond_xmit_policy_set(uint8_t bonded_port_id, uint8_t policy) +{ + struct bond_dev_private *internals; + + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + internals = rte_eth_devices[bonded_port_id].data->dev_private; + + switch (policy) { + case BALANCE_XMIT_POLICY_LAYER2: + internals->balance_xmit_policy = policy; + internals->xmit_hash = xmit_l2_hash; + break; + case BALANCE_XMIT_POLICY_LAYER23: + internals->balance_xmit_policy = policy; + internals->xmit_hash = xmit_l23_hash; + break; + case BALANCE_XMIT_POLICY_LAYER34: + internals->balance_xmit_policy = policy; + internals->xmit_hash = xmit_l34_hash; + break; + + default: + return -1; + } + return 0; +} + +int +rte_eth_bond_xmit_policy_get(uint8_t bonded_port_id) +{ + struct bond_dev_private *internals; + + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + internals = rte_eth_devices[bonded_port_id].data->dev_private; + + return internals->balance_xmit_policy; +} + +int +rte_eth_bond_link_monitoring_set(uint8_t bonded_port_id, uint32_t internal_ms) +{ + struct bond_dev_private *internals; + + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + internals = rte_eth_devices[bonded_port_id].data->dev_private; + internals->link_status_polling_interval_ms = internal_ms; + + return 0; +} + +int +rte_eth_bond_link_monitoring_get(uint8_t bonded_port_id) +{ + struct bond_dev_private *internals; + + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + internals = rte_eth_devices[bonded_port_id].data->dev_private; + + return internals->link_status_polling_interval_ms; +} + +int +rte_eth_bond_link_down_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms) + +{ + struct bond_dev_private *internals; + + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + internals = rte_eth_devices[bonded_port_id].data->dev_private; + internals->link_down_delay_ms = delay_ms; + + return 0; +} + +int +rte_eth_bond_link_down_prop_delay_get(uint8_t bonded_port_id) +{ + struct bond_dev_private *internals; + + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + internals = rte_eth_devices[bonded_port_id].data->dev_private; + + return internals->link_down_delay_ms; +} + +int +rte_eth_bond_link_up_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms) + +{ + struct bond_dev_private *internals; + + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + internals = rte_eth_devices[bonded_port_id].data->dev_private; + internals->link_up_delay_ms = delay_ms; + + return 0; +} + +int +rte_eth_bond_link_up_prop_delay_get(uint8_t bonded_port_id) +{ + struct bond_dev_private *internals; + + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + internals = rte_eth_devices[bonded_port_id].data->dev_private; + + return internals->link_up_delay_ms; +} diff --git a/drivers/bonding/rte_eth_bond_args.c b/drivers/bonding/rte_eth_bond_args.c new file mode 100644 index 0000000..02ecde6 --- /dev/null +++ b/drivers/bonding/rte_eth_bond_args.c @@ -0,0 +1,278 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include +#include + +#include "rte_eth_bond.h" +#include "rte_eth_bond_private.h" + +const char *pmd_bond_init_valid_arguments[] = { + PMD_BOND_SLAVE_PORT_KVARG, + PMD_BOND_PRIMARY_SLAVE_KVARG, + PMD_BOND_MODE_KVARG, + PMD_BOND_XMIT_POLICY_KVARG, + PMD_BOND_SOCKET_ID_KVARG, + PMD_BOND_MAC_ADDR_KVARG, + + NULL +}; + +static inline int +find_port_id_by_pci_addr(const struct rte_pci_addr *pci_addr) +{ + struct rte_pci_addr *eth_pci_addr; + unsigned i; + + for (i = 0; i < rte_eth_dev_count(); i++) { + + if (rte_eth_devices[i].pci_dev == NULL) + continue; + + eth_pci_addr = &(rte_eth_devices[i].pci_dev->addr); + + if (pci_addr->bus == eth_pci_addr->bus && + pci_addr->devid == eth_pci_addr->devid && + pci_addr->domain == eth_pci_addr->domain && + pci_addr->function == eth_pci_addr->function) + return i; + } + return -1; +} + +static inline int +find_port_id_by_dev_name(const char *name) +{ + unsigned i; + + for (i = 0; i < rte_eth_dev_count(); i++) { + if (rte_eth_devices[i].data == NULL) + continue; + + if (strcmp(rte_eth_devices[i].data->name, name) == 0) + return i; + } + return -1; +} + +/** + * Parses a port identifier string to a port id by pci address, then by name, + * and finally port id. + */ +static inline int +parse_port_id(const char *port_str) +{ + struct rte_pci_addr dev_addr; + int port_id; + + /* try parsing as pci address, physical devices */ + if (eal_parse_pci_DomBDF(port_str, &dev_addr) == 0) { + port_id = find_port_id_by_pci_addr(&dev_addr); + if (port_id < 0) + return -1; + } else { + /* try parsing as device name, virtual devices */ + port_id = find_port_id_by_dev_name(port_str); + if (port_id < 0) { + char *end; + errno = 0; + + /* try parsing as port id */ + port_id = strtol(port_str, &end, 10); + if (*end != 0 || errno != 0) + return -1; + } + } + + if (port_id < 0 || port_id > RTE_MAX_ETHPORTS) { + RTE_BOND_LOG(ERR, "Slave port specified (%s) outside expected range", + port_str); + return -1; + } + return port_id; +} + +int +bond_ethdev_parse_slave_port_kvarg(const char *key __rte_unused, + const char *value, void *extra_args) +{ + struct bond_ethdev_slave_ports *slave_ports; + + if (value == NULL || extra_args == NULL) + return -1; + + slave_ports = extra_args; + + if (strcmp(key, PMD_BOND_SLAVE_PORT_KVARG) == 0) { + int port_id = parse_port_id(value); + if (port_id < 0) { + RTE_BOND_LOG(ERR, "Invalid slave port value (%s) specified", value); + return -1; + } else + slave_ports->slaves[slave_ports->slave_count++] = + (uint8_t)port_id; + } + return 0; +} + +int +bond_ethdev_parse_slave_mode_kvarg(const char *key __rte_unused, + const char *value, void *extra_args) +{ + uint8_t *mode; + char *endptr; + + if (value == NULL || extra_args == NULL) + return -1; + + mode = extra_args; + + errno = 0; + *mode = strtol(value, &endptr, 10); + if (*endptr != 0 || errno != 0) + return -1; + + /* validate mode value */ + switch (*mode) { + case BONDING_MODE_ROUND_ROBIN: + case BONDING_MODE_ACTIVE_BACKUP: + case BONDING_MODE_BALANCE: + case BONDING_MODE_BROADCAST: + case BONDING_MODE_8023AD: + case BONDING_MODE_TLB: + case BONDING_MODE_ALB: + return 0; + default: + RTE_BOND_LOG(ERR, "Invalid slave mode value (%s) specified", value); + return -1; + } +} + +int +bond_ethdev_parse_socket_id_kvarg(const char *key __rte_unused, + const char *value, void *extra_args) +{ + int socket_id; + char *endptr; + + if (value == NULL || extra_args == NULL) + return -1; + + errno = 0; + socket_id = (uint8_t)strtol(value, &endptr, 10); + if (*endptr != 0 || errno != 0) + return -1; + + /* validate mode value */ + if (socket_id >= 0 && socket_id < number_of_sockets()) { + *(uint8_t *)extra_args = (uint8_t)socket_id; + return 0; + } + return -1; +} + +int +bond_ethdev_parse_primary_slave_port_id_kvarg(const char *key __rte_unused, + const char *value, void *extra_args) +{ + int primary_slave_port_id; + + if (value == NULL || extra_args == NULL) + return -1; + + primary_slave_port_id = parse_port_id(value); + if (primary_slave_port_id < 0) + return -1; + + *(uint8_t *)extra_args = (uint8_t)primary_slave_port_id; + + return 0; +} + +int +bond_ethdev_parse_balance_xmit_policy_kvarg(const char *key __rte_unused, + const char *value, void *extra_args) +{ + uint8_t *xmit_policy; + + if (value == NULL || extra_args == NULL) + return -1; + + xmit_policy = extra_args; + + if (strcmp(PMD_BOND_XMIT_POLICY_LAYER2_KVARG, value) == 0) + *xmit_policy = BALANCE_XMIT_POLICY_LAYER2; + else if (strcmp(PMD_BOND_XMIT_POLICY_LAYER23_KVARG, value) == 0) + *xmit_policy = BALANCE_XMIT_POLICY_LAYER23; + else if (strcmp(PMD_BOND_XMIT_POLICY_LAYER34_KVARG, value) == 0) + *xmit_policy = BALANCE_XMIT_POLICY_LAYER34; + else + return -1; + + return 0; +} + +int +bond_ethdev_parse_bond_mac_addr_kvarg(const char *key __rte_unused, + const char *value, void *extra_args) +{ + if (value == NULL || extra_args == NULL) + return -1; + + /* Parse MAC */ + return cmdline_parse_etheraddr(NULL, value, extra_args, + sizeof(struct ether_addr)); +} + +int +bond_ethdev_parse_time_ms_kvarg(const char *key __rte_unused, + const char *value, void *extra_args) +{ + uint32_t time_ms; + char *endptr; + + if (value == NULL || extra_args == NULL) + return -1; + + errno = 0; + time_ms = (uint32_t)strtol(value, &endptr, 10); + if (*endptr != 0 || errno != 0) + return -1; + + *(uint32_t *)extra_args = time_ms; + + return 0; +} diff --git a/drivers/bonding/rte_eth_bond_pmd.c b/drivers/bonding/rte_eth_bond_pmd.c new file mode 100644 index 0000000..c937e6b --- /dev/null +++ b/drivers/bonding/rte_eth_bond_pmd.c @@ -0,0 +1,2269 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rte_eth_bond.h" +#include "rte_eth_bond_private.h" +#include "rte_eth_bond_8023ad_private.h" + +#define REORDER_PERIOD_MS 10 + +#define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port) + +/* Table for statistics in mode 5 TLB */ +static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS]; + +static inline size_t +get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto) +{ + size_t vlan_offset = 0; + + if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) { + struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1); + + vlan_offset = sizeof(struct vlan_hdr); + *proto = vlan_hdr->eth_proto; + + if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) { + vlan_hdr = vlan_hdr + 1; + *proto = vlan_hdr->eth_proto; + vlan_offset += sizeof(struct vlan_hdr); + } + } + return vlan_offset; +} + +static uint16_t +bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) +{ + struct bond_dev_private *internals; + + uint16_t num_rx_slave = 0; + uint16_t num_rx_total = 0; + + int i; + + /* Cast to structure, containing bonded device's port id and queue id */ + struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue; + + internals = bd_rx_q->dev_private; + + + for (i = 0; i < internals->active_slave_count && nb_pkts; i++) { + /* Offset of pointer to *bufs increases as packets are received + * from other slaves */ + num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i], + bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts); + if (num_rx_slave) { + num_rx_total += num_rx_slave; + nb_pkts -= num_rx_slave; + } + } + + return num_rx_total; +} + +static uint16_t +bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs, + uint16_t nb_pkts) +{ + struct bond_dev_private *internals; + + /* Cast to structure, containing bonded device's port id and queue id */ + struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue; + + internals = bd_rx_q->dev_private; + + return rte_eth_rx_burst(internals->current_primary_port, + bd_rx_q->queue_id, bufs, nb_pkts); +} + +static uint16_t +bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, + uint16_t nb_pkts) +{ + /* Cast to structure, containing bonded device's port id and queue id */ + struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue; + struct bond_dev_private *internals = bd_rx_q->dev_private; + struct ether_addr bond_mac; + + struct ether_hdr *hdr; + + const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW); + uint16_t num_rx_total = 0; /* Total number of received packets */ + uint8_t slaves[RTE_MAX_ETHPORTS]; + uint8_t slave_count; + + uint8_t collecting; /* current slave collecting status */ + const uint8_t promisc = internals->promiscuous_en; + uint8_t i, j, k; + + rte_eth_macaddr_get(internals->port_id, &bond_mac); + /* Copy slave list to protect against slave up/down changes during tx + * bursting */ + slave_count = internals->active_slave_count; + memcpy(slaves, internals->active_slaves, + sizeof(internals->active_slaves[0]) * slave_count); + + for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) { + j = num_rx_total; + collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[i]], COLLECTING); + + /* Read packets from this slave */ + num_rx_total += rte_eth_rx_burst(slaves[i], bd_rx_q->queue_id, + &bufs[num_rx_total], nb_pkts - num_rx_total); + + for (k = j; k < 2 && k < num_rx_total; k++) + rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *)); + + /* Handle slow protocol packets. */ + while (j < num_rx_total) { + if (j + 3 < num_rx_total) + rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *)); + + hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *); + /* Remove packet from array if it is slow packet or slave is not + * in collecting state or bondign interface is not in promiscus + * mode and packet address does not match. */ + if (unlikely(hdr->ether_type == ether_type_slow_be || + !collecting || (!promisc && + !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) { + + if (hdr->ether_type == ether_type_slow_be) { + bond_mode_8023ad_handle_slow_pkt(internals, slaves[i], + bufs[j]); + } else + rte_pktmbuf_free(bufs[j]); + + /* Packet is managed by mode 4 or dropped, shift the array */ + num_rx_total--; + if (j < num_rx_total) { + memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) * + (num_rx_total - j)); + } + } else + j++; + } + } + + return num_rx_total; +} + +#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) +uint32_t burstnumberRX; +uint32_t burstnumberTX; + +#ifdef RTE_LIBRTE_BOND_DEBUG_ALB + +static void +arp_op_name(uint16_t arp_op, char *buf) +{ + switch (arp_op) { + case ARP_OP_REQUEST: + snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request"); + return; + case ARP_OP_REPLY: + snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply"); + return; + case ARP_OP_REVREQUEST: + snprintf(buf, sizeof("Reverse ARP Request"), "%s", + "Reverse ARP Request"); + return; + case ARP_OP_REVREPLY: + snprintf(buf, sizeof("Reverse ARP Reply"), "%s", + "Reverse ARP Reply"); + return; + case ARP_OP_INVREQUEST: + snprintf(buf, sizeof("Peer Identify Request"), "%s", + "Peer Identify Request"); + return; + case ARP_OP_INVREPLY: + snprintf(buf, sizeof("Peer Identify Reply"), "%s", + "Peer Identify Reply"); + return; + default: + break; + } + snprintf(buf, sizeof("Unknown"), "%s", "Unknown"); + return; +} +#endif +#define MaxIPv4String 16 +static void +ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size) +{ + uint32_t ipv4_addr; + + ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr); + snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF, + (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF, + ipv4_addr & 0xFF); +} + +#define MAX_CLIENTS_NUMBER 128 +uint8_t active_clients; +struct client_stats_t { + uint8_t port; + uint32_t ipv4_addr; + uint32_t ipv4_rx_packets; + uint32_t ipv4_tx_packets; +}; +struct client_stats_t client_stats[MAX_CLIENTS_NUMBER]; + +static void +update_client_stats(uint32_t addr, uint8_t port, uint32_t *TXorRXindicator) +{ + int i = 0; + + for (; i < MAX_CLIENTS_NUMBER; i++) { + if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) { + /* Just update RX packets number for this client */ + if (TXorRXindicator == &burstnumberRX) + client_stats[i].ipv4_rx_packets++; + else + client_stats[i].ipv4_tx_packets++; + return; + } + } + /* We have a new client. Insert him to the table, and increment stats */ + if (TXorRXindicator == &burstnumberRX) + client_stats[active_clients].ipv4_rx_packets++; + else + client_stats[active_clients].ipv4_tx_packets++; + client_stats[active_clients].ipv4_addr = addr; + client_stats[active_clients].port = port; + active_clients++; + +} + +#ifdef RTE_LIBRTE_BOND_DEBUG_ALB +#define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \ + RTE_LOG(DEBUG, PMD, \ + "%s " \ + "port:%d " \ + "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \ + "SrcIP:%s " \ + "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \ + "DstIP:%s " \ + "%s " \ + "%d\n", \ + info, \ + port, \ + eth_h->s_addr.addr_bytes[0], \ + eth_h->s_addr.addr_bytes[1], \ + eth_h->s_addr.addr_bytes[2], \ + eth_h->s_addr.addr_bytes[3], \ + eth_h->s_addr.addr_bytes[4], \ + eth_h->s_addr.addr_bytes[5], \ + src_ip, \ + eth_h->d_addr.addr_bytes[0], \ + eth_h->d_addr.addr_bytes[1], \ + eth_h->d_addr.addr_bytes[2], \ + eth_h->d_addr.addr_bytes[3], \ + eth_h->d_addr.addr_bytes[4], \ + eth_h->d_addr.addr_bytes[5], \ + dst_ip, \ + arp_op, \ + ++burstnumber) +#endif + +static void +mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h, + uint8_t port, uint32_t __attribute__((unused)) *burstnumber) +{ + struct ipv4_hdr *ipv4_h; +#ifdef RTE_LIBRTE_BOND_DEBUG_ALB + struct arp_hdr *arp_h; + char dst_ip[16]; + char ArpOp[24]; + char buf[16]; +#endif + char src_ip[16]; + + uint16_t ether_type = eth_h->ether_type; + uint16_t offset = get_vlan_offset(eth_h, ðer_type); + +#ifdef RTE_LIBRTE_BOND_DEBUG_ALB + snprintf(buf, 16, "%s", info); +#endif + + if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) { + ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset); + ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String); +#ifdef RTE_LIBRTE_BOND_DEBUG_ALB + ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String); + MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber); +#endif + update_client_stats(ipv4_h->src_addr, port, burstnumber); + } +#ifdef RTE_LIBRTE_BOND_DEBUG_ALB + else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) { + arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset); + ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String); + ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String); + arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp); + MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber); + } +#endif +} +#endif + +static uint16_t +bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) +{ + struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; + struct bond_dev_private *internals = bd_tx_q->dev_private; + struct ether_hdr *eth_h; + uint16_t ether_type, offset; + uint16_t nb_recv_pkts; + int i; + + nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts); + + for (i = 0; i < nb_recv_pkts; i++) { + eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *); + ether_type = eth_h->ether_type; + offset = get_vlan_offset(eth_h, ðer_type); + + if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) { +#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) + mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX); +#endif + bond_mode_alb_arp_recv(eth_h, offset, internals); + } +#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) + else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) + mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX); +#endif + } + + return nb_recv_pkts; +} + +static uint16_t +bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs, + uint16_t nb_pkts) +{ + struct bond_dev_private *internals; + struct bond_tx_queue *bd_tx_q; + + struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts]; + uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 }; + + uint8_t num_of_slaves; + uint8_t slaves[RTE_MAX_ETHPORTS]; + + uint16_t num_tx_total = 0, num_tx_slave; + + static int slave_idx = 0; + int i, cslave_idx = 0, tx_fail_total = 0; + + bd_tx_q = (struct bond_tx_queue *)queue; + internals = bd_tx_q->dev_private; + + /* Copy slave list to protect against slave up/down changes during tx + * bursting */ + num_of_slaves = internals->active_slave_count; + memcpy(slaves, internals->active_slaves, + sizeof(internals->active_slaves[0]) * num_of_slaves); + + if (num_of_slaves < 1) + return num_tx_total; + + /* Populate slaves mbuf with which packets are to be sent on it */ + for (i = 0; i < nb_pkts; i++) { + cslave_idx = (slave_idx + i) % num_of_slaves; + slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i]; + } + + /* increment current slave index so the next call to tx burst starts on the + * next slave */ + slave_idx = ++cslave_idx; + + /* Send packet burst on each slave device */ + for (i = 0; i < num_of_slaves; i++) { + if (slave_nb_pkts[i] > 0) { + num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, + slave_bufs[i], slave_nb_pkts[i]); + + /* if tx burst fails move packets to end of bufs */ + if (unlikely(num_tx_slave < slave_nb_pkts[i])) { + int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave; + + tx_fail_total += tx_fail_slave; + + memcpy(&bufs[nb_pkts - tx_fail_total], + &slave_bufs[i][num_tx_slave], + tx_fail_slave * sizeof(bufs[0])); + } + num_tx_total += num_tx_slave; + } + } + + return num_tx_total; +} + +static uint16_t +bond_ethdev_tx_burst_active_backup(void *queue, + struct rte_mbuf **bufs, uint16_t nb_pkts) +{ + struct bond_dev_private *internals; + struct bond_tx_queue *bd_tx_q; + + bd_tx_q = (struct bond_tx_queue *)queue; + internals = bd_tx_q->dev_private; + + if (internals->active_slave_count < 1) + return 0; + + return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id, + bufs, nb_pkts); +} + +static inline uint16_t +ether_hash(struct ether_hdr *eth_hdr) +{ + uint16_t *word_src_addr = (uint16_t *)eth_hdr->s_addr.addr_bytes; + uint16_t *word_dst_addr = (uint16_t *)eth_hdr->d_addr.addr_bytes; + + return (word_src_addr[0] ^ word_dst_addr[0]) ^ + (word_src_addr[1] ^ word_dst_addr[1]) ^ + (word_src_addr[2] ^ word_dst_addr[2]); +} + +static inline uint32_t +ipv4_hash(struct ipv4_hdr *ipv4_hdr) +{ + return (ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr); +} + +static inline uint32_t +ipv6_hash(struct ipv6_hdr *ipv6_hdr) +{ + uint32_t *word_src_addr = (uint32_t *)&(ipv6_hdr->src_addr[0]); + uint32_t *word_dst_addr = (uint32_t *)&(ipv6_hdr->dst_addr[0]); + + return (word_src_addr[0] ^ word_dst_addr[0]) ^ + (word_src_addr[1] ^ word_dst_addr[1]) ^ + (word_src_addr[2] ^ word_dst_addr[2]) ^ + (word_src_addr[3] ^ word_dst_addr[3]); +} + +uint16_t +xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count) +{ + struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *); + + uint32_t hash = ether_hash(eth_hdr); + + return (hash ^= hash >> 8) % slave_count; +} + +uint16_t +xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count) +{ + struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *); + uint16_t proto = eth_hdr->ether_type; + size_t vlan_offset = get_vlan_offset(eth_hdr, &proto); + uint32_t hash, l3hash = 0; + + hash = ether_hash(eth_hdr); + + if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) { + struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *) + ((char *)(eth_hdr + 1) + vlan_offset); + l3hash = ipv4_hash(ipv4_hdr); + + } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) { + struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *) + ((char *)(eth_hdr + 1) + vlan_offset); + l3hash = ipv6_hash(ipv6_hdr); + } + + hash = hash ^ l3hash; + hash ^= hash >> 16; + hash ^= hash >> 8; + + return hash % slave_count; +} + +uint16_t +xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count) +{ + struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *); + uint16_t proto = eth_hdr->ether_type; + size_t vlan_offset = get_vlan_offset(eth_hdr, &proto); + + struct udp_hdr *udp_hdr = NULL; + struct tcp_hdr *tcp_hdr = NULL; + uint32_t hash, l3hash = 0, l4hash = 0; + + if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) { + struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *) + ((char *)(eth_hdr + 1) + vlan_offset); + size_t ip_hdr_offset; + + l3hash = ipv4_hash(ipv4_hdr); + + ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) * + IPV4_IHL_MULTIPLIER; + + if (ipv4_hdr->next_proto_id == IPPROTO_TCP) { + tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + + ip_hdr_offset); + l4hash = HASH_L4_PORTS(tcp_hdr); + } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) { + udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr + + ip_hdr_offset); + l4hash = HASH_L4_PORTS(udp_hdr); + } + } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) { + struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *) + ((char *)(eth_hdr + 1) + vlan_offset); + l3hash = ipv6_hash(ipv6_hdr); + + if (ipv6_hdr->proto == IPPROTO_TCP) { + tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1); + l4hash = HASH_L4_PORTS(tcp_hdr); + } else if (ipv6_hdr->proto == IPPROTO_UDP) { + udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1); + l4hash = HASH_L4_PORTS(udp_hdr); + } + } + + hash = l3hash ^ l4hash; + hash ^= hash >> 16; + hash ^= hash >> 8; + + return hash % slave_count; +} + +struct bwg_slave { + uint64_t bwg_left_int; + uint64_t bwg_left_remainder; + uint8_t slave; +}; + +void +bond_tlb_activate_slave(struct bond_dev_private *internals) { + int i; + + for (i = 0; i < internals->active_slave_count; i++) { + tlb_last_obytets[internals->active_slaves[i]] = 0; + } +} + +static int +bandwidth_cmp(const void *a, const void *b) +{ + const struct bwg_slave *bwg_a = a; + const struct bwg_slave *bwg_b = b; + int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int; + int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder - + (int64_t)bwg_a->bwg_left_remainder; + if (diff > 0) + return 1; + else if (diff < 0) + return -1; + else if (diff2 > 0) + return 1; + else if (diff2 < 0) + return -1; + else + return 0; +} + +static void +bandwidth_left(uint8_t port_id, uint64_t load, uint8_t update_idx, + struct bwg_slave *bwg_slave) +{ + struct rte_eth_link link_status; + + rte_eth_link_get(port_id, &link_status); + uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8; + if (link_bwg == 0) + return; + link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS; + bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg; + bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg; +} + +static void +bond_ethdev_update_tlb_slave_cb(void *arg) +{ + struct bond_dev_private *internals = arg; + struct rte_eth_stats slave_stats; + struct bwg_slave bwg_array[RTE_MAX_ETHPORTS]; + uint8_t slave_count; + uint64_t tx_bytes; + + uint8_t update_stats = 0; + uint8_t i, slave_id; + + internals->slave_update_idx++; + + + if (internals->slave_update_idx >= REORDER_PERIOD_MS) + update_stats = 1; + + for (i = 0; i < internals->active_slave_count; i++) { + slave_id = internals->active_slaves[i]; + rte_eth_stats_get(slave_id, &slave_stats); + tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id]; + bandwidth_left(slave_id, tx_bytes, + internals->slave_update_idx, &bwg_array[i]); + bwg_array[i].slave = slave_id; + + if (update_stats) { + tlb_last_obytets[slave_id] = slave_stats.obytes; + } + } + + if (update_stats == 1) + internals->slave_update_idx = 0; + + slave_count = i; + qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp); + for (i = 0; i < slave_count; i++) + internals->tlb_slaves_order[i] = bwg_array[i].slave; + + rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb, + (struct bond_dev_private *)internals); +} + +static uint16_t +bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) +{ + struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; + struct bond_dev_private *internals = bd_tx_q->dev_private; + + struct rte_eth_dev *primary_port = + &rte_eth_devices[internals->primary_port]; + uint16_t num_tx_total = 0; + uint8_t i, j; + + uint8_t num_of_slaves = internals->active_slave_count; + uint8_t slaves[RTE_MAX_ETHPORTS]; + + struct ether_hdr *ether_hdr; + struct ether_addr primary_slave_addr; + struct ether_addr active_slave_addr; + + if (num_of_slaves < 1) + return num_tx_total; + + memcpy(slaves, internals->tlb_slaves_order, + sizeof(internals->tlb_slaves_order[0]) * num_of_slaves); + + + ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr); + + if (nb_pkts > 3) { + for (i = 0; i < 3; i++) + rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*)); + } + + for (i = 0; i < num_of_slaves; i++) { + rte_eth_macaddr_get(slaves[i], &active_slave_addr); + for (j = num_tx_total; j < nb_pkts; j++) { + if (j + 3 < nb_pkts) + rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*)); + + ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *); + if (is_same_ether_addr(ðer_hdr->s_addr, &primary_slave_addr)) + ether_addr_copy(&active_slave_addr, ðer_hdr->s_addr); +#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) + mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX); +#endif + } + + num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, + bufs + num_tx_total, nb_pkts - num_tx_total); + + if (num_tx_total == nb_pkts) + break; + } + + return num_tx_total; +} + +void +bond_tlb_disable(struct bond_dev_private *internals) +{ + rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals); +} + +void +bond_tlb_enable(struct bond_dev_private *internals) +{ + bond_ethdev_update_tlb_slave_cb(internals); +} + +static uint16_t +bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) +{ + struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; + struct bond_dev_private *internals = bd_tx_q->dev_private; + + struct ether_hdr *eth_h; + uint16_t ether_type, offset; + + struct client_data *client_info; + + /* + * We create transmit buffers for every slave and one additional to send + * through tlb. In worst case every packet will be send on one port. + */ + struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts]; + uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 }; + + /* + * We create separate transmit buffers for update packets as they wont be + * counted in num_tx_total. + */ + struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE]; + uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 }; + + struct rte_mbuf *upd_pkt; + size_t pkt_size; + + uint16_t num_send, num_not_send = 0; + uint16_t num_tx_total = 0; + uint8_t slave_idx; + + int i, j; + + /* Search tx buffer for ARP packets and forward them to alb */ + for (i = 0; i < nb_pkts; i++) { + eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *); + ether_type = eth_h->ether_type; + offset = get_vlan_offset(eth_h, ðer_type); + + if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) { + slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals); + + /* Change src mac in eth header */ + rte_eth_macaddr_get(slave_idx, ð_h->s_addr); + + /* Add packet to slave tx buffer */ + slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i]; + slave_bufs_pkts[slave_idx]++; + } else { + /* If packet is not ARP, send it with TLB policy */ + slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] = + bufs[i]; + slave_bufs_pkts[RTE_MAX_ETHPORTS]++; + } + } + + /* Update connected client ARP tables */ + if (internals->mode6.ntt) { + for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) { + client_info = &internals->mode6.client_table[i]; + + if (client_info->in_use) { + /* Allocate new packet to send ARP update on current slave */ + upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool); + if (upd_pkt == NULL) { + RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n"); + continue; + } + pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr) + + client_info->vlan_count * sizeof(struct vlan_hdr); + upd_pkt->data_len = pkt_size; + upd_pkt->pkt_len = pkt_size; + + slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt, + internals); + + /* Add packet to update tx buffer */ + update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt; + update_bufs_pkts[slave_idx]++; + } + } + internals->mode6.ntt = 0; + } + + /* Send ARP packets on proper slaves */ + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + if (slave_bufs_pkts[i] > 0) { + num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, + slave_bufs[i], slave_bufs_pkts[i]); + for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) { + bufs[nb_pkts - 1 - num_not_send - j] = + slave_bufs[i][nb_pkts - 1 - j]; + } + + num_tx_total += num_send; + num_not_send += slave_bufs_pkts[i] - num_send; + +#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) + /* Print TX stats including update packets */ + for (j = 0; j < slave_bufs_pkts[i]; j++) { + eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *); + mode6_debug("TX ARP:", eth_h, i, &burstnumberTX); + } +#endif + } + } + + /* Send update packets on proper slaves */ + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + if (update_bufs_pkts[i] > 0) { + num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i], + update_bufs_pkts[i]); + for (j = num_send; j < update_bufs_pkts[i]; j++) { + rte_pktmbuf_free(update_bufs[i][j]); + } +#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) + for (j = 0; j < update_bufs_pkts[i]; j++) { + eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *); + mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX); + } +#endif + } + } + + /* Send non-ARP packets using tlb policy */ + if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) { + num_send = bond_ethdev_tx_burst_tlb(queue, + slave_bufs[RTE_MAX_ETHPORTS], + slave_bufs_pkts[RTE_MAX_ETHPORTS]); + + for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) { + bufs[nb_pkts - 1 - num_not_send - j] = + slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j]; + } + + num_tx_total += num_send; + num_not_send += slave_bufs_pkts[RTE_MAX_ETHPORTS] - num_send; + } + + return num_tx_total; +} + +static uint16_t +bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs, + uint16_t nb_pkts) +{ + struct bond_dev_private *internals; + struct bond_tx_queue *bd_tx_q; + + uint8_t num_of_slaves; + uint8_t slaves[RTE_MAX_ETHPORTS]; + + uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0; + + int i, op_slave_id; + + struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts]; + uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 }; + + bd_tx_q = (struct bond_tx_queue *)queue; + internals = bd_tx_q->dev_private; + + /* Copy slave list to protect against slave up/down changes during tx + * bursting */ + num_of_slaves = internals->active_slave_count; + memcpy(slaves, internals->active_slaves, + sizeof(internals->active_slaves[0]) * num_of_slaves); + + if (num_of_slaves < 1) + return num_tx_total; + + /* Populate slaves mbuf with the packets which are to be sent on it */ + for (i = 0; i < nb_pkts; i++) { + /* Select output slave using hash based on xmit policy */ + op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves); + + /* Populate slave mbuf arrays with mbufs for that slave */ + slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i]; + } + + /* Send packet burst on each slave device */ + for (i = 0; i < num_of_slaves; i++) { + if (slave_nb_pkts[i] > 0) { + num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, + slave_bufs[i], slave_nb_pkts[i]); + + /* if tx burst fails move packets to end of bufs */ + if (unlikely(num_tx_slave < slave_nb_pkts[i])) { + int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave; + + tx_fail_total += slave_tx_fail_count; + memcpy(&bufs[nb_pkts - tx_fail_total], + &slave_bufs[i][num_tx_slave], + slave_tx_fail_count * sizeof(bufs[0])); + } + + num_tx_total += num_tx_slave; + } + } + + return num_tx_total; +} + +static uint16_t +bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, + uint16_t nb_pkts) +{ + struct bond_dev_private *internals; + struct bond_tx_queue *bd_tx_q; + + uint8_t num_of_slaves; + uint8_t slaves[RTE_MAX_ETHPORTS]; + /* positions in slaves, not ID */ + uint8_t distributing_offsets[RTE_MAX_ETHPORTS]; + uint8_t distributing_count; + + uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0; + uint16_t i, j, op_slave_idx; + const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1; + + /* Allocate additional packets in case 8023AD mode. */ + struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size]; + void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL }; + + /* Total amount of packets in slave_bufs */ + uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 }; + /* Slow packets placed in each slave */ + uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 }; + + bd_tx_q = (struct bond_tx_queue *)queue; + internals = bd_tx_q->dev_private; + + /* Copy slave list to protect against slave up/down changes during tx + * bursting */ + num_of_slaves = internals->active_slave_count; + if (num_of_slaves < 1) + return num_tx_total; + + memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves); + + distributing_count = 0; + for (i = 0; i < num_of_slaves; i++) { + struct port *port = &mode_8023ad_ports[slaves[i]]; + + slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring, + slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS); + slave_nb_pkts[i] = slave_slow_nb_pkts[i]; + + for (j = 0; j < slave_slow_nb_pkts[i]; j++) + slave_bufs[i][j] = slow_pkts[j]; + + if (ACTOR_STATE(port, DISTRIBUTING)) + distributing_offsets[distributing_count++] = i; + } + + if (likely(distributing_count > 0)) { + /* Populate slaves mbuf with the packets which are to be sent on it */ + for (i = 0; i < nb_pkts; i++) { + /* Select output slave using hash based on xmit policy */ + op_slave_idx = internals->xmit_hash(bufs[i], distributing_count); + + /* Populate slave mbuf arrays with mbufs for that slave. Use only + * slaves that are currently distributing. */ + uint8_t slave_offset = distributing_offsets[op_slave_idx]; + slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i]; + slave_nb_pkts[slave_offset]++; + } + } + + /* Send packet burst on each slave device */ + for (i = 0; i < num_of_slaves; i++) { + if (slave_nb_pkts[i] == 0) + continue; + + num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, + slave_bufs[i], slave_nb_pkts[i]); + + /* If tx burst fails drop slow packets */ + for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++) + rte_pktmbuf_free(slave_bufs[i][num_tx_slave]); + + num_tx_total += num_tx_slave - slave_slow_nb_pkts[i]; + num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave; + + /* If tx burst fails move packets to end of bufs */ + if (unlikely(num_tx_slave < slave_nb_pkts[i])) { + uint16_t j = nb_pkts - num_tx_fail_total; + for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++) + bufs[j] = slave_bufs[i][num_tx_slave]; + } + } + + return num_tx_total; +} + +static uint16_t +bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs, + uint16_t nb_pkts) +{ + struct bond_dev_private *internals; + struct bond_tx_queue *bd_tx_q; + + uint8_t tx_failed_flag = 0, num_of_slaves; + uint8_t slaves[RTE_MAX_ETHPORTS]; + + uint16_t max_nb_of_tx_pkts = 0; + + int slave_tx_total[RTE_MAX_ETHPORTS]; + int i, most_successful_tx_slave = -1; + + bd_tx_q = (struct bond_tx_queue *)queue; + internals = bd_tx_q->dev_private; + + /* Copy slave list to protect against slave up/down changes during tx + * bursting */ + num_of_slaves = internals->active_slave_count; + memcpy(slaves, internals->active_slaves, + sizeof(internals->active_slaves[0]) * num_of_slaves); + + if (num_of_slaves < 1) + return 0; + + /* Increment reference count on mbufs */ + for (i = 0; i < nb_pkts; i++) + rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1); + + /* Transmit burst on each active slave */ + for (i = 0; i < num_of_slaves; i++) { + slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, + bufs, nb_pkts); + + if (unlikely(slave_tx_total[i] < nb_pkts)) + tx_failed_flag = 1; + + /* record the value and slave index for the slave which transmits the + * maximum number of packets */ + if (slave_tx_total[i] > max_nb_of_tx_pkts) { + max_nb_of_tx_pkts = slave_tx_total[i]; + most_successful_tx_slave = i; + } + } + + /* if slaves fail to transmit packets from burst, the calling application + * is not expected to know about multiple references to packets so we must + * handle failures of all packets except those of the most successful slave + */ + if (unlikely(tx_failed_flag)) + for (i = 0; i < num_of_slaves; i++) + if (i != most_successful_tx_slave) + while (slave_tx_total[i] < nb_pkts) + rte_pktmbuf_free(bufs[slave_tx_total[i]++]); + + return max_nb_of_tx_pkts; +} + +void +link_properties_set(struct rte_eth_dev *bonded_eth_dev, + struct rte_eth_link *slave_dev_link) +{ + struct rte_eth_link *bonded_dev_link = &bonded_eth_dev->data->dev_link; + struct bond_dev_private *internals = bonded_eth_dev->data->dev_private; + + if (slave_dev_link->link_status && + bonded_eth_dev->data->dev_started) { + bonded_dev_link->link_duplex = slave_dev_link->link_duplex; + bonded_dev_link->link_speed = slave_dev_link->link_speed; + + internals->link_props_set = 1; + } +} + +void +link_properties_reset(struct rte_eth_dev *bonded_eth_dev) +{ + struct bond_dev_private *internals = bonded_eth_dev->data->dev_private; + + memset(&(bonded_eth_dev->data->dev_link), 0, + sizeof(bonded_eth_dev->data->dev_link)); + + internals->link_props_set = 0; +} + +int +link_properties_valid(struct rte_eth_link *bonded_dev_link, + struct rte_eth_link *slave_dev_link) +{ + if (bonded_dev_link->link_duplex != slave_dev_link->link_duplex || + bonded_dev_link->link_speed != slave_dev_link->link_speed) + return -1; + + return 0; +} + +int +mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr) +{ + struct ether_addr *mac_addr; + + if (eth_dev == NULL) { + RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__); + return -1; + } + + if (dst_mac_addr == NULL) { + RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__); + return -1; + } + + mac_addr = eth_dev->data->mac_addrs; + + ether_addr_copy(mac_addr, dst_mac_addr); + return 0; +} + +int +mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr) +{ + struct ether_addr *mac_addr; + + if (eth_dev == NULL) { + RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified"); + return -1; + } + + if (new_mac_addr == NULL) { + RTE_BOND_LOG(ERR, "NULL pointer MAC specified"); + return -1; + } + + mac_addr = eth_dev->data->mac_addrs; + + /* If new MAC is different to current MAC then update */ + if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0) + memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr)); + + return 0; +} + +int +mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev) +{ + struct bond_dev_private *internals = bonded_eth_dev->data->dev_private; + int i; + + /* Update slave devices MAC addresses */ + if (internals->slave_count < 1) + return -1; + + switch (internals->mode) { + case BONDING_MODE_ROUND_ROBIN: + case BONDING_MODE_BALANCE: + case BONDING_MODE_BROADCAST: + for (i = 0; i < internals->slave_count; i++) { + if (mac_address_set(&rte_eth_devices[internals->slaves[i].port_id], + bonded_eth_dev->data->mac_addrs)) { + RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address", + internals->slaves[i].port_id); + return -1; + } + } + break; + case BONDING_MODE_8023AD: + bond_mode_8023ad_mac_address_update(bonded_eth_dev); + break; + case BONDING_MODE_ACTIVE_BACKUP: + case BONDING_MODE_TLB: + case BONDING_MODE_ALB: + default: + for (i = 0; i < internals->slave_count; i++) { + if (internals->slaves[i].port_id == + internals->current_primary_port) { + if (mac_address_set(&rte_eth_devices[internals->primary_port], + bonded_eth_dev->data->mac_addrs)) { + RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address", + internals->current_primary_port); + return -1; + } + } else { + if (mac_address_set( + &rte_eth_devices[internals->slaves[i].port_id], + &internals->slaves[i].persisted_mac_addr)) { + RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address", + internals->slaves[i].port_id); + return -1; + } + } + } + } + + return 0; +} + +int +bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode) +{ + struct bond_dev_private *internals; + + internals = eth_dev->data->dev_private; + + switch (mode) { + case BONDING_MODE_ROUND_ROBIN: + eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin; + eth_dev->rx_pkt_burst = bond_ethdev_rx_burst; + break; + case BONDING_MODE_ACTIVE_BACKUP: + eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup; + eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup; + break; + case BONDING_MODE_BALANCE: + eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance; + eth_dev->rx_pkt_burst = bond_ethdev_rx_burst; + break; + case BONDING_MODE_BROADCAST: + eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast; + eth_dev->rx_pkt_burst = bond_ethdev_rx_burst; + break; + case BONDING_MODE_8023AD: + if (bond_mode_8023ad_enable(eth_dev) != 0) + return -1; + + eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad; + eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad; + RTE_LOG(WARNING, PMD, + "Using mode 4, it is necessary to do TX burst and RX burst " + "at least every 100ms.\n"); + break; + case BONDING_MODE_TLB: + eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb; + eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup; + break; + case BONDING_MODE_ALB: + if (bond_mode_alb_enable(eth_dev) != 0) + return -1; + + eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb; + eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb; + break; + default: + return -1; + } + + internals->mode = mode; + + return 0; +} + +int +slave_configure(struct rte_eth_dev *bonded_eth_dev, + struct rte_eth_dev *slave_eth_dev) +{ + struct bond_rx_queue *bd_rx_q; + struct bond_tx_queue *bd_tx_q; + + int errval; + uint16_t q_id; + + /* Stop slave */ + rte_eth_dev_stop(slave_eth_dev->data->port_id); + + /* Enable interrupts on slave device if supported */ + if (slave_eth_dev->driver->pci_drv.drv_flags & RTE_PCI_DRV_INTR_LSC) + slave_eth_dev->data->dev_conf.intr_conf.lsc = 1; + + /* Configure device */ + errval = rte_eth_dev_configure(slave_eth_dev->data->port_id, + bonded_eth_dev->data->nb_rx_queues, + bonded_eth_dev->data->nb_tx_queues, + &(slave_eth_dev->data->dev_conf)); + if (errval != 0) { + RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)", + slave_eth_dev->data->port_id, errval); + return errval; + } + + /* Setup Rx Queues */ + for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) { + bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id]; + + errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id, + bd_rx_q->nb_rx_desc, + rte_eth_dev_socket_id(slave_eth_dev->data->port_id), + &(bd_rx_q->rx_conf), bd_rx_q->mb_pool); + if (errval != 0) { + RTE_BOND_LOG(ERR, + "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)", + slave_eth_dev->data->port_id, q_id, errval); + return errval; + } + } + + /* Setup Tx Queues */ + for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) { + bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id]; + + errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id, + bd_tx_q->nb_tx_desc, + rte_eth_dev_socket_id(slave_eth_dev->data->port_id), + &bd_tx_q->tx_conf); + if (errval != 0) { + RTE_BOND_LOG(ERR, + "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)", + slave_eth_dev->data->port_id, q_id, errval); + return errval; + } + } + + /* Start device */ + errval = rte_eth_dev_start(slave_eth_dev->data->port_id); + if (errval != 0) { + RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)", + slave_eth_dev->data->port_id, errval); + return -1; + } + + return 0; +} + +void +slave_remove(struct bond_dev_private *internals, + struct rte_eth_dev *slave_eth_dev) +{ + uint8_t i; + + for (i = 0; i < internals->slave_count; i++) + if (internals->slaves[i].port_id == + slave_eth_dev->data->port_id) + break; + + if (i < (internals->slave_count - 1)) + memmove(&internals->slaves[i], &internals->slaves[i + 1], + sizeof(internals->slaves[0]) * + (internals->slave_count - i - 1)); + + internals->slave_count--; +} + +static void +bond_ethdev_slave_link_status_change_monitor(void *cb_arg); + +void +slave_add(struct bond_dev_private *internals, + struct rte_eth_dev *slave_eth_dev) +{ + struct bond_slave_details *slave_details = + &internals->slaves[internals->slave_count]; + + slave_details->port_id = slave_eth_dev->data->port_id; + slave_details->last_link_status = 0; + + /* If slave device doesn't support interrupts then we need to enabled + * polling to monitor link status */ + if (!(slave_eth_dev->pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)) { + slave_details->link_status_poll_enabled = 1; + + if (!internals->link_status_polling_enabled) { + internals->link_status_polling_enabled = 1; + + rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000, + bond_ethdev_slave_link_status_change_monitor, + (void *)&rte_eth_devices[internals->port_id]); + } + } + + slave_details->link_status_wait_to_complete = 0; + /* clean tlb_last_obytes when adding port for bonding device */ + memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs, + sizeof(struct ether_addr)); +} + +void +bond_ethdev_primary_set(struct bond_dev_private *internals, + uint8_t slave_port_id) +{ + int i; + + if (internals->active_slave_count < 1) + internals->current_primary_port = slave_port_id; + else + /* Search bonded device slave ports for new proposed primary port */ + for (i = 0; i < internals->active_slave_count; i++) { + if (internals->active_slaves[i] == slave_port_id) + internals->current_primary_port = slave_port_id; + } +} + +static void +bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev); + +static int +bond_ethdev_start(struct rte_eth_dev *eth_dev) +{ + struct bond_dev_private *internals; + int i; + + /* slave eth dev will be started by bonded device */ + if (valid_bonded_ethdev(eth_dev)) { + RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)", + eth_dev->data->port_id); + return -1; + } + + eth_dev->data->dev_link.link_status = 0; + eth_dev->data->dev_started = 1; + + internals = eth_dev->data->dev_private; + + if (internals->slave_count == 0) { + RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices"); + return -1; + } + + if (internals->user_defined_mac == 0) { + struct ether_addr *new_mac_addr = NULL; + + for (i = 0; i < internals->slave_count; i++) + if (internals->slaves[i].port_id == internals->primary_port) + new_mac_addr = &internals->slaves[i].persisted_mac_addr; + + if (new_mac_addr == NULL) + return -1; + + if (mac_address_set(eth_dev, new_mac_addr) != 0) { + RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address", + eth_dev->data->port_id); + return -1; + } + } + + /* Update all slave devices MACs*/ + if (mac_address_slaves_update(eth_dev) != 0) + return -1; + + /* If bonded device is configure in promiscuous mode then re-apply config */ + if (internals->promiscuous_en) + bond_ethdev_promiscuous_enable(eth_dev); + + /* Reconfigure each slave device if starting bonded device */ + for (i = 0; i < internals->slave_count; i++) { + if (slave_configure(eth_dev, + &(rte_eth_devices[internals->slaves[i].port_id])) != 0) { + RTE_BOND_LOG(ERR, + "bonded port (%d) failed to reconfigure slave device (%d)", + eth_dev->data->port_id, internals->slaves[i].port_id); + return -1; + } + } + + if (internals->user_defined_primary_port) + bond_ethdev_primary_set(internals, internals->primary_port); + + if (internals->mode == BONDING_MODE_8023AD) + bond_mode_8023ad_start(eth_dev); + + if (internals->mode == BONDING_MODE_TLB || + internals->mode == BONDING_MODE_ALB) + bond_tlb_enable(internals); + + return 0; +} + +static void +bond_ethdev_stop(struct rte_eth_dev *eth_dev) +{ + struct bond_dev_private *internals = eth_dev->data->dev_private; + uint8_t i; + + if (internals->mode == BONDING_MODE_8023AD) { + struct port *port; + void *pkt = NULL; + + bond_mode_8023ad_stop(eth_dev); + + /* Discard all messages to/from mode 4 state machines */ + for (i = 0; i < internals->slave_count; i++) { + port = &mode_8023ad_ports[internals->slaves[i].port_id]; + + RTE_VERIFY(port->rx_ring != NULL); + while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT) + rte_pktmbuf_free(pkt); + + RTE_VERIFY(port->tx_ring != NULL); + while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT) + rte_pktmbuf_free(pkt); + } + } + + if (internals->mode == BONDING_MODE_TLB || + internals->mode == BONDING_MODE_ALB) { + bond_tlb_disable(internals); + for (i = 0; i < internals->active_slave_count; i++) + tlb_last_obytets[internals->active_slaves[i]] = 0; + } + + internals->active_slave_count = 0; + internals->link_status_polling_enabled = 0; + + eth_dev->data->dev_link.link_status = 0; + eth_dev->data->dev_started = 0; +} + +static void +bond_ethdev_close(struct rte_eth_dev *dev __rte_unused) +{ +} + +/* forward declaration */ +static int bond_ethdev_configure(struct rte_eth_dev *dev); + +static void +bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) +{ + struct bond_dev_private *internals = dev->data->dev_private; + + dev_info->driver_name = driver_name; + dev_info->max_mac_addrs = 1; + + dev_info->max_rx_pktlen = (uint32_t)2048; + + dev_info->max_rx_queues = (uint16_t)128; + dev_info->max_tx_queues = (uint16_t)512; + + dev_info->min_rx_bufsize = 0; + dev_info->pci_dev = dev->pci_dev; + + dev_info->rx_offload_capa = internals->rx_offload_capa; + dev_info->tx_offload_capa = internals->tx_offload_capa; +} + +static int +bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, + uint16_t nb_rx_desc, unsigned int socket_id __rte_unused, + const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool) +{ + struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *) + rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue), + 0, dev->pci_dev->numa_node); + if (bd_rx_q == NULL) + return -1; + + bd_rx_q->queue_id = rx_queue_id; + bd_rx_q->dev_private = dev->data->dev_private; + + bd_rx_q->nb_rx_desc = nb_rx_desc; + + memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf)); + bd_rx_q->mb_pool = mb_pool; + + dev->data->rx_queues[rx_queue_id] = bd_rx_q; + + return 0; +} + +static int +bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, + uint16_t nb_tx_desc, unsigned int socket_id __rte_unused, + const struct rte_eth_txconf *tx_conf) +{ + struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *) + rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue), + 0, dev->pci_dev->numa_node); + + if (bd_tx_q == NULL) + return -1; + + bd_tx_q->queue_id = tx_queue_id; + bd_tx_q->dev_private = dev->data->dev_private; + + bd_tx_q->nb_tx_desc = nb_tx_desc; + memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf)); + + dev->data->tx_queues[tx_queue_id] = bd_tx_q; + + return 0; +} + +static void +bond_ethdev_rx_queue_release(void *queue) +{ + if (queue == NULL) + return; + + rte_free(queue); +} + +static void +bond_ethdev_tx_queue_release(void *queue) +{ + if (queue == NULL) + return; + + rte_free(queue); +} + +static void +bond_ethdev_slave_link_status_change_monitor(void *cb_arg) +{ + struct rte_eth_dev *bonded_ethdev, *slave_ethdev; + struct bond_dev_private *internals; + + /* Default value for polling slave found is true as we don't want to + * disable the polling thread if we cannot get the lock */ + int i, polling_slave_found = 1; + + if (cb_arg == NULL) + return; + + bonded_ethdev = (struct rte_eth_dev *)cb_arg; + internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private; + + if (!bonded_ethdev->data->dev_started || + !internals->link_status_polling_enabled) + return; + + /* If device is currently being configured then don't check slaves link + * status, wait until next period */ + if (rte_spinlock_trylock(&internals->lock)) { + if (internals->slave_count > 0) + polling_slave_found = 0; + + for (i = 0; i < internals->slave_count; i++) { + if (!internals->slaves[i].link_status_poll_enabled) + continue; + + slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id]; + polling_slave_found = 1; + + /* Update slave link status */ + (*slave_ethdev->dev_ops->link_update)(slave_ethdev, + internals->slaves[i].link_status_wait_to_complete); + + /* if link status has changed since last checked then call lsc + * event callback */ + if (slave_ethdev->data->dev_link.link_status != + internals->slaves[i].last_link_status) { + internals->slaves[i].last_link_status = + slave_ethdev->data->dev_link.link_status; + + bond_ethdev_lsc_event_callback(internals->slaves[i].port_id, + RTE_ETH_EVENT_INTR_LSC, + &bonded_ethdev->data->port_id); + } + } + rte_spinlock_unlock(&internals->lock); + } + + if (polling_slave_found) + /* Set alarm to continue monitoring link status of slave ethdev's */ + rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000, + bond_ethdev_slave_link_status_change_monitor, cb_arg); +} + +static int +bond_ethdev_link_update(struct rte_eth_dev *bonded_eth_dev, + int wait_to_complete) +{ + struct bond_dev_private *internals = bonded_eth_dev->data->dev_private; + + if (!bonded_eth_dev->data->dev_started || + internals->active_slave_count == 0) { + bonded_eth_dev->data->dev_link.link_status = 0; + return 0; + } else { + struct rte_eth_dev *slave_eth_dev; + int i, link_up = 0; + + for (i = 0; i < internals->active_slave_count; i++) { + slave_eth_dev = &rte_eth_devices[internals->active_slaves[i]]; + + (*slave_eth_dev->dev_ops->link_update)(slave_eth_dev, + wait_to_complete); + if (slave_eth_dev->data->dev_link.link_status == 1) { + link_up = 1; + break; + } + } + + bonded_eth_dev->data->dev_link.link_status = link_up; + } + + return 0; +} + +static void +bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) +{ + struct bond_dev_private *internals = dev->data->dev_private; + struct rte_eth_stats slave_stats; + + int i; + + /* clear bonded stats before populating from slaves */ + memset(stats, 0, sizeof(*stats)); + + for (i = 0; i < internals->slave_count; i++) { + rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats); + + stats->ipackets += slave_stats.ipackets; + stats->opackets += slave_stats.opackets; + stats->ibytes += slave_stats.ibytes; + stats->obytes += slave_stats.obytes; + stats->ierrors += slave_stats.ierrors; + stats->oerrors += slave_stats.oerrors; + stats->imcasts += slave_stats.imcasts; + stats->rx_nombuf += slave_stats.rx_nombuf; + stats->fdirmatch += slave_stats.fdirmatch; + stats->fdirmiss += slave_stats.fdirmiss; + stats->tx_pause_xon += slave_stats.tx_pause_xon; + stats->rx_pause_xon += slave_stats.rx_pause_xon; + stats->tx_pause_xoff += slave_stats.tx_pause_xoff; + stats->rx_pause_xoff += slave_stats.rx_pause_xoff; + } +} + +static void +bond_ethdev_stats_reset(struct rte_eth_dev *dev) +{ + struct bond_dev_private *internals = dev->data->dev_private; + int i; + + for (i = 0; i < internals->slave_count; i++) + rte_eth_stats_reset(internals->slaves[i].port_id); +} + +static void +bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev) +{ + struct bond_dev_private *internals = eth_dev->data->dev_private; + int i; + + internals->promiscuous_en = 1; + + switch (internals->mode) { + /* Promiscuous mode is propagated to all slaves */ + case BONDING_MODE_ROUND_ROBIN: + case BONDING_MODE_BALANCE: + case BONDING_MODE_BROADCAST: + for (i = 0; i < internals->slave_count; i++) + rte_eth_promiscuous_enable(internals->slaves[i].port_id); + break; + /* In mode4 promiscus mode is managed when slave is added/removed */ + case BONDING_MODE_8023AD: + break; + /* Promiscuous mode is propagated only to primary slave */ + case BONDING_MODE_ACTIVE_BACKUP: + case BONDING_MODE_TLB: + case BONDING_MODE_ALB: + default: + rte_eth_promiscuous_enable(internals->current_primary_port); + } +} + +static void +bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev) +{ + struct bond_dev_private *internals = dev->data->dev_private; + int i; + + internals->promiscuous_en = 0; + + switch (internals->mode) { + /* Promiscuous mode is propagated to all slaves */ + case BONDING_MODE_ROUND_ROBIN: + case BONDING_MODE_BALANCE: + case BONDING_MODE_BROADCAST: + for (i = 0; i < internals->slave_count; i++) + rte_eth_promiscuous_disable(internals->slaves[i].port_id); + break; + /* In mode4 promiscus mode is set managed when slave is added/removed */ + case BONDING_MODE_8023AD: + break; + /* Promiscuous mode is propagated only to primary slave */ + case BONDING_MODE_ACTIVE_BACKUP: + case BONDING_MODE_TLB: + case BONDING_MODE_ALB: + default: + rte_eth_promiscuous_disable(internals->current_primary_port); + } +} + +static void +bond_ethdev_delayed_lsc_propagation(void *arg) +{ + if (arg == NULL) + return; + + _rte_eth_dev_callback_process((struct rte_eth_dev *)arg, + RTE_ETH_EVENT_INTR_LSC); +} + +void +bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type, + void *param) +{ + struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev; + struct bond_dev_private *internals; + struct rte_eth_link link; + + int i, valid_slave = 0; + uint8_t active_pos; + uint8_t lsc_flag = 0; + + if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL) + return; + + bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param]; + slave_eth_dev = &rte_eth_devices[port_id]; + + if (valid_bonded_ethdev(bonded_eth_dev)) + return; + + internals = bonded_eth_dev->data->dev_private; + + /* If the device isn't started don't handle interrupts */ + if (!bonded_eth_dev->data->dev_started) + return; + + /* verify that port_id is a valid slave of bonded port */ + for (i = 0; i < internals->slave_count; i++) { + if (internals->slaves[i].port_id == port_id) { + valid_slave = 1; + break; + } + } + + if (!valid_slave) + return; + + /* Search for port in active port list */ + active_pos = find_slave_by_id(internals->active_slaves, + internals->active_slave_count, port_id); + + rte_eth_link_get_nowait(port_id, &link); + if (link.link_status) { + if (active_pos < internals->active_slave_count) + return; + + /* if no active slave ports then set this port to be primary port */ + if (internals->active_slave_count < 1) { + /* If first active slave, then change link status */ + bonded_eth_dev->data->dev_link.link_status = 1; + internals->current_primary_port = port_id; + lsc_flag = 1; + + mac_address_slaves_update(bonded_eth_dev); + + /* Inherit eth dev link properties from first active slave */ + link_properties_set(bonded_eth_dev, + &(slave_eth_dev->data->dev_link)); + } + + activate_slave(bonded_eth_dev, port_id); + + /* If user has defined the primary port then default to using it */ + if (internals->user_defined_primary_port && + internals->primary_port == port_id) + bond_ethdev_primary_set(internals, port_id); + } else { + if (active_pos == internals->active_slave_count) + return; + + /* Remove from active slave list */ + deactivate_slave(bonded_eth_dev, port_id); + + /* No active slaves, change link status to down and reset other + * link properties */ + if (internals->active_slave_count < 1) { + lsc_flag = 1; + bonded_eth_dev->data->dev_link.link_status = 0; + + link_properties_reset(bonded_eth_dev); + } + + /* Update primary id, take first active slave from list or if none + * available set to -1 */ + if (port_id == internals->current_primary_port) { + if (internals->active_slave_count > 0) + bond_ethdev_primary_set(internals, + internals->active_slaves[0]); + else + internals->current_primary_port = internals->primary_port; + } + } + + if (lsc_flag) { + /* Cancel any possible outstanding interrupts if delays are enabled */ + if (internals->link_up_delay_ms > 0 || + internals->link_down_delay_ms > 0) + rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation, + bonded_eth_dev); + + if (bonded_eth_dev->data->dev_link.link_status) { + if (internals->link_up_delay_ms > 0) + rte_eal_alarm_set(internals->link_up_delay_ms * 1000, + bond_ethdev_delayed_lsc_propagation, + (void *)bonded_eth_dev); + else + _rte_eth_dev_callback_process(bonded_eth_dev, + RTE_ETH_EVENT_INTR_LSC); + + } else { + if (internals->link_down_delay_ms > 0) + rte_eal_alarm_set(internals->link_down_delay_ms * 1000, + bond_ethdev_delayed_lsc_propagation, + (void *)bonded_eth_dev); + else + _rte_eth_dev_callback_process(bonded_eth_dev, + RTE_ETH_EVENT_INTR_LSC); + } + } +} + +struct eth_dev_ops default_dev_ops = { + .dev_start = bond_ethdev_start, + .dev_stop = bond_ethdev_stop, + .dev_close = bond_ethdev_close, + .dev_configure = bond_ethdev_configure, + .dev_infos_get = bond_ethdev_info, + .rx_queue_setup = bond_ethdev_rx_queue_setup, + .tx_queue_setup = bond_ethdev_tx_queue_setup, + .rx_queue_release = bond_ethdev_rx_queue_release, + .tx_queue_release = bond_ethdev_tx_queue_release, + .link_update = bond_ethdev_link_update, + .stats_get = bond_ethdev_stats_get, + .stats_reset = bond_ethdev_stats_reset, + .promiscuous_enable = bond_ethdev_promiscuous_enable, + .promiscuous_disable = bond_ethdev_promiscuous_disable +}; + +static int +bond_init(const char *name, const char *params) +{ + struct bond_dev_private *internals; + struct rte_kvargs *kvlist; + uint8_t bonding_mode, socket_id; + int arg_count, port_id; + + RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name); + + kvlist = rte_kvargs_parse(params, pmd_bond_init_valid_arguments); + if (kvlist == NULL) + return -1; + + /* Parse link bonding mode */ + if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) { + if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG, + &bond_ethdev_parse_slave_mode_kvarg, + &bonding_mode) != 0) { + RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n", + name); + goto parse_error; + } + } else { + RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded " + "device %s\n", name); + goto parse_error; + } + + /* Parse socket id to create bonding device on */ + arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG); + if (arg_count == 1) { + if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG, + &bond_ethdev_parse_socket_id_kvarg, &socket_id) + != 0) { + RTE_LOG(ERR, EAL, "Invalid socket Id specified for " + "bonded device %s\n", name); + goto parse_error; + } + } else if (arg_count > 1) { + RTE_LOG(ERR, EAL, "Socket Id can be specified only once for " + "bonded device %s\n", name); + goto parse_error; + } else { + socket_id = rte_socket_id(); + } + + /* Create link bonding eth device */ + port_id = rte_eth_bond_create(name, bonding_mode, socket_id); + if (port_id < 0) { + RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on " + "socket %u.\n", name, bonding_mode, socket_id); + goto parse_error; + } + internals = rte_eth_devices[port_id].data->dev_private; + internals->kvlist = kvlist; + + RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on " + "socket %u.\n", name, port_id, bonding_mode, socket_id); + return 0; + +parse_error: + rte_kvargs_free(kvlist); + + return -1; +} + +/* this part will resolve the slave portids after all the other pdev and vdev + * have been allocated */ +static int +bond_ethdev_configure(struct rte_eth_dev *dev) +{ + char *name = dev->data->name; + struct bond_dev_private *internals = dev->data->dev_private; + struct rte_kvargs *kvlist = internals->kvlist; + int arg_count; + uint8_t port_id = dev - rte_eth_devices; + + /* + * if no kvlist, it means that this bonded device has been created + * through the bonding api. + */ + if (!kvlist) + return 0; + + /* Parse MAC address for bonded device */ + arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG); + if (arg_count == 1) { + struct ether_addr bond_mac; + + if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG, + &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) { + RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n", + name); + return -1; + } + + /* Set MAC address */ + if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) { + RTE_LOG(ERR, EAL, + "Failed to set mac address on bonded device %s\n", + name); + return -1; + } + } else if (arg_count > 1) { + RTE_LOG(ERR, EAL, + "MAC address can be specified only once for bonded device %s\n", + name); + return -1; + } + + /* Parse/set balance mode transmit policy */ + arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG); + if (arg_count == 1) { + uint8_t xmit_policy; + + if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG, + &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) != + 0) { + RTE_LOG(INFO, EAL, + "Invalid xmit policy specified for bonded device %s\n", + name); + return -1; + } + + /* Set balance mode transmit policy*/ + if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) { + RTE_LOG(ERR, EAL, + "Failed to set balance xmit policy on bonded device %s\n", + name); + return -1; + } + } else if (arg_count > 1) { + RTE_LOG(ERR, EAL, + "Transmit policy can be specified only once for bonded device" + " %s\n", name); + return -1; + } + + /* Parse/add slave ports to bonded device */ + if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) { + struct bond_ethdev_slave_ports slave_ports; + unsigned i; + + memset(&slave_ports, 0, sizeof(slave_ports)); + + if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG, + &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) { + RTE_LOG(ERR, EAL, + "Failed to parse slave ports for bonded device %s\n", + name); + return -1; + } + + for (i = 0; i < slave_ports.slave_count; i++) { + if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) { + RTE_LOG(ERR, EAL, + "Failed to add port %d as slave to bonded device %s\n", + slave_ports.slaves[i], name); + } + } + + } else { + RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name); + return -1; + } + + /* Parse/set primary slave port id*/ + arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG); + if (arg_count == 1) { + uint8_t primary_slave_port_id; + + if (rte_kvargs_process(kvlist, + PMD_BOND_PRIMARY_SLAVE_KVARG, + &bond_ethdev_parse_primary_slave_port_id_kvarg, + &primary_slave_port_id) < 0) { + RTE_LOG(INFO, EAL, + "Invalid primary slave port id specified for bonded device" + " %s\n", name); + return -1; + } + + /* Set balance mode transmit policy*/ + if (rte_eth_bond_primary_set(port_id, (uint8_t)primary_slave_port_id) + != 0) { + RTE_LOG(ERR, EAL, + "Failed to set primary slave port %d on bonded device %s\n", + primary_slave_port_id, name); + return -1; + } + } else if (arg_count > 1) { + RTE_LOG(INFO, EAL, + "Primary slave can be specified only once for bonded device" + " %s\n", name); + return -1; + } + + /* Parse link status monitor polling interval */ + arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG); + if (arg_count == 1) { + uint32_t lsc_poll_interval_ms; + + if (rte_kvargs_process(kvlist, + PMD_BOND_LSC_POLL_PERIOD_KVARG, + &bond_ethdev_parse_time_ms_kvarg, + &lsc_poll_interval_ms) < 0) { + RTE_LOG(INFO, EAL, + "Invalid lsc polling interval value specified for bonded" + " device %s\n", name); + return -1; + } + + if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms) + != 0) { + RTE_LOG(ERR, EAL, + "Failed to set lsc monitor polling interval (%u ms) on" + " bonded device %s\n", lsc_poll_interval_ms, name); + return -1; + } + } else if (arg_count > 1) { + RTE_LOG(INFO, EAL, + "LSC polling interval can be specified only once for bonded" + " device %s\n", name); + return -1; + } + + /* Parse link up interrupt propagation delay */ + arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG); + if (arg_count == 1) { + uint32_t link_up_delay_ms; + + if (rte_kvargs_process(kvlist, + PMD_BOND_LINK_UP_PROP_DELAY_KVARG, + &bond_ethdev_parse_time_ms_kvarg, + &link_up_delay_ms) < 0) { + RTE_LOG(INFO, EAL, + "Invalid link up propagation delay value specified for" + " bonded device %s\n", name); + return -1; + } + + /* Set balance mode transmit policy*/ + if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms) + != 0) { + RTE_LOG(ERR, EAL, + "Failed to set link up propagation delay (%u ms) on bonded" + " device %s\n", link_up_delay_ms, name); + return -1; + } + } else if (arg_count > 1) { + RTE_LOG(INFO, EAL, + "Link up propagation delay can be specified only once for" + " bonded device %s\n", name); + return -1; + } + + /* Parse link down interrupt propagation delay */ + arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG); + if (arg_count == 1) { + uint32_t link_down_delay_ms; + + if (rte_kvargs_process(kvlist, + PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG, + &bond_ethdev_parse_time_ms_kvarg, + &link_down_delay_ms) < 0) { + RTE_LOG(INFO, EAL, + "Invalid link down propagation delay value specified for" + " bonded device %s\n", name); + return -1; + } + + /* Set balance mode transmit policy*/ + if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms) + != 0) { + RTE_LOG(ERR, EAL, + "Failed to set link down propagation delay (%u ms) on" + " bonded device %s\n", link_down_delay_ms, name); + return -1; + } + } else if (arg_count > 1) { + RTE_LOG(INFO, EAL, + "Link down propagation delay can be specified only once for" + " bonded device %s\n", name); + return -1; + } + + return 0; +} + +static struct rte_driver bond_drv = { + .name = "eth_bond", + .type = PMD_VDEV, + .init = bond_init, +}; + +PMD_REGISTER_DRIVER(bond_drv); diff --git a/drivers/bonding/rte_eth_bond_private.h b/drivers/bonding/rte_eth_bond_private.h new file mode 100644 index 0000000..45e5c65 --- /dev/null +++ b/drivers/bonding/rte_eth_bond_private.h @@ -0,0 +1,287 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ETH_BOND_PRIVATE_H_ +#define _RTE_ETH_BOND_PRIVATE_H_ + +#include +#include + +#include "rte_eth_bond.h" +#include "rte_eth_bond_8023ad_private.h" +#include "rte_eth_bond_alb.h" + +#define PMD_BOND_SLAVE_PORT_KVARG ("slave") +#define PMD_BOND_PRIMARY_SLAVE_KVARG ("primary") +#define PMD_BOND_MODE_KVARG ("mode") +#define PMD_BOND_XMIT_POLICY_KVARG ("xmit_policy") +#define PMD_BOND_SOCKET_ID_KVARG ("socket_id") +#define PMD_BOND_MAC_ADDR_KVARG ("mac") +#define PMD_BOND_LSC_POLL_PERIOD_KVARG ("lsc_poll_period_ms") +#define PMD_BOND_LINK_UP_PROP_DELAY_KVARG ("up_delay") +#define PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG ("down_delay") + +#define PMD_BOND_XMIT_POLICY_LAYER2_KVARG ("l2") +#define PMD_BOND_XMIT_POLICY_LAYER23_KVARG ("l23") +#define PMD_BOND_XMIT_POLICY_LAYER34_KVARG ("l34") + +#define RTE_BOND_LOG(lvl, msg, ...) \ + RTE_LOG(lvl, PMD, "%s(%d) - " msg "\n", __func__, __LINE__, ##__VA_ARGS__) + +#define BONDING_MODE_INVALID 0xFF + +extern const char *pmd_bond_init_valid_arguments[]; + +extern const char *driver_name; + +/** Port Queue Mapping Structure */ +struct bond_rx_queue { + uint16_t queue_id; + /**< Queue Id */ + struct bond_dev_private *dev_private; + /**< Reference to eth_dev private structure */ + uint16_t nb_rx_desc; + /**< Number of RX descriptors available for the queue */ + struct rte_eth_rxconf rx_conf; + /**< Copy of RX configuration structure for queue */ + struct rte_mempool *mb_pool; + /**< Reference to mbuf pool to use for RX queue */ +}; + +struct bond_tx_queue { + uint16_t queue_id; + /**< Queue Id */ + struct bond_dev_private *dev_private; + /**< Reference to dev private structure */ + uint16_t nb_tx_desc; + /**< Number of TX descriptors available for the queue */ + struct rte_eth_txconf tx_conf; + /**< Copy of TX configuration structure for queue */ +}; + +/** Bonded slave devices structure */ +struct bond_ethdev_slave_ports { + uint8_t slaves[RTE_MAX_ETHPORTS]; /**< Slave port id array */ + uint8_t slave_count; /**< Number of slaves */ +}; + +struct bond_slave_details { + uint8_t port_id; + + uint8_t link_status_poll_enabled; + uint8_t link_status_wait_to_complete; + uint8_t last_link_status; + /**< Port Id of slave eth_dev */ + struct ether_addr persisted_mac_addr; +}; + + +typedef uint16_t (*xmit_hash_t)(const struct rte_mbuf *buf, uint8_t slave_count); + +/** Link Bonding PMD device private configuration Structure */ +struct bond_dev_private { + uint8_t port_id; /**< Port Id of Bonded Port */ + uint8_t mode; /**< Link Bonding Mode */ + + rte_spinlock_t lock; + + uint8_t primary_port; /**< Primary Slave Port */ + uint8_t current_primary_port; /**< Primary Slave Port */ + uint8_t user_defined_primary_port; + /**< Flag for whether primary port is user defined or not */ + + uint8_t balance_xmit_policy; + /**< Transmit policy - l2 / l23 / l34 for operation in balance mode */ + xmit_hash_t xmit_hash; + /**< Transmit policy hash function */ + + uint8_t user_defined_mac; + /**< Flag for whether MAC address is user defined or not */ + uint8_t promiscuous_en; + /**< Enabled/disable promiscuous mode on bonding device */ + uint8_t link_props_set; + /**< flag to denote if the link properties are set */ + + uint8_t link_status_polling_enabled; + uint32_t link_status_polling_interval_ms; + + uint32_t link_down_delay_ms; + uint32_t link_up_delay_ms; + + uint16_t nb_rx_queues; /**< Total number of rx queues */ + uint16_t nb_tx_queues; /**< Total number of tx queues*/ + + uint8_t active_slave_count; /**< Number of active slaves */ + uint8_t active_slaves[RTE_MAX_ETHPORTS]; /**< Active slave list */ + + uint8_t slave_count; /**< Number of bonded slaves */ + struct bond_slave_details slaves[RTE_MAX_ETHPORTS]; + /**< Arary of bonded slaves details */ + + struct mode8023ad_private mode4; + uint8_t tlb_slaves_order[RTE_MAX_ETHPORTS]; /* TLB active slaves send order */ + struct mode_alb_private mode6; + + uint32_t rx_offload_capa; /** Rx offload capability */ + uint32_t tx_offload_capa; /** Tx offload capability */ + + struct rte_kvargs *kvlist; + uint8_t slave_update_idx; +}; + +extern struct eth_dev_ops default_dev_ops; + +int +valid_bonded_ethdev(struct rte_eth_dev *eth_dev); + +/* Search given slave array to find possition of given id. + * Return slave pos or slaves_count if not found. */ +static inline uint8_t +find_slave_by_id(uint8_t *slaves, uint8_t slaves_count, uint8_t slave_id) { + + uint8_t pos; + for (pos = 0; pos < slaves_count; pos++) { + if (slave_id == slaves[pos]) + break; + } + + return pos; +} + +int +valid_port_id(uint8_t port_id); + +int +valid_bonded_port_id(uint8_t port_id); + +int +valid_slave_port_id(uint8_t port_id); + +void +deactivate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id); + +void +activate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id); + +void +link_properties_set(struct rte_eth_dev *bonded_eth_dev, + struct rte_eth_link *slave_dev_link); +void +link_properties_reset(struct rte_eth_dev *bonded_eth_dev); + +int +link_properties_valid(struct rte_eth_link *bonded_dev_link, + struct rte_eth_link *slave_dev_link); + +int +mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr); + +int +mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr); + +int +mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev); + +uint8_t +number_of_sockets(void); + +int +bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode); + +int +slave_configure(struct rte_eth_dev *bonded_eth_dev, + struct rte_eth_dev *slave_eth_dev); + +void +slave_remove(struct bond_dev_private *internals, + struct rte_eth_dev *slave_eth_dev); + +void +slave_add(struct bond_dev_private *internals, + struct rte_eth_dev *slave_eth_dev); + +uint16_t +xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count); + +uint16_t +xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count); + +uint16_t +xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count); + +void +bond_ethdev_primary_set(struct bond_dev_private *internals, + uint8_t slave_port_id); + +void +bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type, + void *param); + +int +bond_ethdev_parse_slave_port_kvarg(const char *key __rte_unused, + const char *value, void *extra_args); + +int +bond_ethdev_parse_slave_mode_kvarg(const char *key __rte_unused, + const char *value, void *extra_args); + +int +bond_ethdev_parse_socket_id_kvarg(const char *key __rte_unused, + const char *value, void *extra_args); + +int +bond_ethdev_parse_primary_slave_port_id_kvarg(const char *key __rte_unused, + const char *value, void *extra_args); + +int +bond_ethdev_parse_balance_xmit_policy_kvarg(const char *key __rte_unused, + const char *value, void *extra_args); + +int +bond_ethdev_parse_bond_mac_addr_kvarg(const char *key __rte_unused, + const char *value, void *extra_args); + +int +bond_ethdev_parse_time_ms_kvarg(const char *key __rte_unused, + const char *value, void *extra_args); + +void +bond_tlb_disable(struct bond_dev_private *internals); + +void +bond_tlb_enable(struct bond_dev_private *internals); + +void +bond_tlb_activate_slave(struct bond_dev_private *internals); + +#endif diff --git a/drivers/bonding/rte_eth_bond_version.map b/drivers/bonding/rte_eth_bond_version.map new file mode 100644 index 0000000..135999e --- /dev/null +++ b/drivers/bonding/rte_eth_bond_version.map @@ -0,0 +1,22 @@ +DPDK_2.0 { + global: + + rte_eth_bond_8023ad_conf_get; + rte_eth_bond_8023ad_setup; + rte_eth_bond_active_slaves_get; + rte_eth_bond_create; + rte_eth_bond_link_monitoring_set; + rte_eth_bond_mac_address_reset; + rte_eth_bond_mac_address_set; + rte_eth_bond_mode_get; + rte_eth_bond_mode_set; + rte_eth_bond_primary_get; + rte_eth_bond_primary_set; + rte_eth_bond_slave_add; + rte_eth_bond_slave_remove; + rte_eth_bond_slaves_get; + rte_eth_bond_xmit_policy_get; + rte_eth_bond_xmit_policy_set; + + local: *; +}; diff --git a/lib/Makefile b/lib/Makefile index bab721f..8270393 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -47,7 +47,6 @@ DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += librte_pmd_i40e DIRS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += librte_pmd_fm10k DIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += librte_pmd_mlx4 DIRS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += librte_pmd_enic -DIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += librte_pmd_bond DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += librte_pmd_ring DIRS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += librte_pmd_pcap DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += librte_pmd_virtio diff --git a/lib/librte_pmd_bond/Makefile b/lib/librte_pmd_bond/Makefile deleted file mode 100644 index 83ccce3..0000000 --- a/lib/librte_pmd_bond/Makefile +++ /dev/null @@ -1,68 +0,0 @@ -# BSD LICENSE -# -# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in -# the documentation and/or other materials provided with the -# distribution. -# * Neither the name of Intel Corporation nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -include $(RTE_SDK)/mk/rte.vars.mk - -# -# library name -# -LIB = librte_pmd_bond.a - -CFLAGS += -O3 -CFLAGS += $(WERROR_FLAGS) - -EXPORT_MAP := rte_eth_bond_version.map - -LIBABIVER := 1 - -# -# all source are stored in SRCS-y -# -SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_api.c -SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_pmd.c -SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_args.c -SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_8023ad.c -SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_alb.c - -# -# Export include files -# -SYMLINK-y-include += rte_eth_bond.h -SYMLINK-y-include += rte_eth_bond_8023ad.h - -# this lib depends upon: -DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += lib/librte_mbuf -DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += lib/librte_ether -DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += lib/librte_malloc -DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += lib/librte_eal -DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += lib/librte_kvargs - -include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_pmd_bond/rte_eth_bond.h b/lib/librte_pmd_bond/rte_eth_bond.h deleted file mode 100644 index d688fc3..0000000 --- a/lib/librte_pmd_bond/rte_eth_bond.h +++ /dev/null @@ -1,366 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _RTE_ETH_BOND_H_ -#define _RTE_ETH_BOND_H_ - -/** - * @file rte_eth_bond.h - * - * RTE Link Bonding Ethernet Device - * Link Bonding for 1GbE and 10GbE ports to allow the aggregation of multiple - * (slave) NICs into a single logical interface. The bonded device processes - * these interfaces based on the mode of operation specified and supported. - * This implementation supports 4 modes of operation round robin, active backup - * balance and broadcast. Providing redundant links, fault tolerance and/or - * load balancing of network ports - */ - -#ifdef __cplusplus -extern "C" { -#endif - -#include - -/* Supported modes of operation of link bonding library */ - -#define BONDING_MODE_ROUND_ROBIN (0) -/**< Round Robin (Mode 0). - * In this mode all transmitted packets will be balanced equally across all - * active slaves of the bonded in a round robin fashion. */ -#define BONDING_MODE_ACTIVE_BACKUP (1) -/**< Active Backup (Mode 1). - * In this mode all packets transmitted will be transmitted on the primary - * slave until such point as the primary slave is no longer available and then - * transmitted packets will be sent on the next available slaves. The primary - * slave can be defined by the user but defaults to the first active slave - * available if not specified. */ -#define BONDING_MODE_BALANCE (2) -/**< Balance (Mode 2). - * In this mode all packets transmitted will be balanced across the available - * slaves using one of three available transmit policies - l2, l2+3 or l3+4. - * See BALANCE_XMIT_POLICY macros definitions for further details on transmit - * policies. */ -#define BONDING_MODE_BROADCAST (3) -/**< Broadcast (Mode 3). - * In this mode all transmitted packets will be transmitted on all available - * active slaves of the bonded. */ -#define BONDING_MODE_8023AD (4) -/**< 802.3AD (Mode 4). - * - * This mode provides auto negotiation/configuration - * of peers and well as link status changes monitoring using out of band - * LACP (link aggregation control protocol) messages. For further details of - * LACP specification see the IEEE 802.3ad/802.1AX standards. It is also - * described here - * https://www.kernel.org/doc/Documentation/networking/bonding.txt. - * - * Important Usage Notes: - * - for LACP mode to work the rx/tx burst functions must be invoked - * at least once every 100ms, otherwise the out-of-band LACP messages will not - * be handled with the expected latency and this may cause the link status to be - * incorrectly marked as down or failure to correctly negotiate with peers. - * - For optimal performance during initial handshaking the array of mbufs provided - * to rx_burst should be at least 2 times the slave count size. - * - */ -#define BONDING_MODE_TLB (5) -/**< Adaptive TLB (Mode 5) - * This mode provides an adaptive transmit load balancing. It dynamically - * changes the transmitting slave, according to the computed load. Statistics - * are collected in 100ms intervals and scheduled every 10ms */ -#define BONDING_MODE_ALB (6) -/**< Adaptive Load Balancing (Mode 6) - * This mode includes adaptive TLB and receive load balancing (RLB). In RLB the - * bonding driver intercepts ARP replies send by local system and overwrites its - * source MAC address, so that different peers send data to the server on - * different slave interfaces. When local system sends ARP request, it saves IP - * information from it. When ARP reply from that peer is received, its MAC is - * stored, one of slave MACs assigned and ARP reply send to that peer. - */ - -/* Balance Mode Transmit Policies */ -#define BALANCE_XMIT_POLICY_LAYER2 (0) -/**< Layer 2 (Ethernet MAC) */ -#define BALANCE_XMIT_POLICY_LAYER23 (1) -/**< Layer 2+3 (Ethernet MAC + IP Addresses) transmit load balancing */ -#define BALANCE_XMIT_POLICY_LAYER34 (2) -/**< Layer 3+4 (IP Addresses + UDP Ports) transmit load balancing */ - -/** - * Create a bonded rte_eth_dev device - * - * @param name Name of new link bonding device. - * @param mode Mode to initialize bonding device in. - * @param socket_id Socket Id on which to allocate eth_dev resources. - * - * @return - * Port Id of created rte_eth_dev on success, negative value otherwise - */ -int -rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id); - -/** - * Add a rte_eth_dev device as a slave to the bonded device - * - * @param bonded_port_id Port ID of bonded device. - * @param slave_port_id Port ID of slave device. - * - * @return - * 0 on success, negative value otherwise - */ -int -rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id); - -/** - * Remove a slave rte_eth_dev device from the bonded device - * - * @param bonded_port_id Port ID of bonded device. - * @param slave_port_id Port ID of slave device. - * - * @return - * 0 on success, negative value otherwise - */ -int -rte_eth_bond_slave_remove(uint8_t bonded_port_id, uint8_t slave_port_id); - -/** - * Set link bonding mode of bonded device - * - * @param bonded_port_id Port ID of bonded device. - * @param mode Bonding mode to set - * - * @return - * 0 on success, negative value otherwise - */ -int -rte_eth_bond_mode_set(uint8_t bonded_port_id, uint8_t mode); - -/** - * Get link bonding mode of bonded device - * - * @param bonded_port_id Port ID of bonded device. - * - * @return - * link bonding mode on success, negative value otherwise - */ -int -rte_eth_bond_mode_get(uint8_t bonded_port_id); - -/** - * Set slave rte_eth_dev as primary slave of bonded device - * - * @param bonded_port_id Port ID of bonded device. - * @param slave_port_id Port ID of slave device. - * - * @return - * 0 on success, negative value otherwise - */ -int -rte_eth_bond_primary_set(uint8_t bonded_port_id, uint8_t slave_port_id); - -/** - * Get primary slave of bonded device - * - * @param bonded_port_id Port ID of bonded device. - * - * @return - * Port Id of primary slave on success, -1 on failure - */ -int -rte_eth_bond_primary_get(uint8_t bonded_port_id); - -/** - * Populate an array with list of the slaves port id's of the bonded device - * - * @param bonded_port_id Port ID of bonded eth_dev to interrogate - * @param slaves Array to be populated with the current active slaves - * @param len Length of slaves array - * - * @return - * Number of slaves associated with bonded device on success, - * negative value otherwise - */ -int -rte_eth_bond_slaves_get(uint8_t bonded_port_id, uint8_t slaves[], uint8_t len); - -/** - * Populate an array with list of the active slaves port id's of the bonded - * device. - * - * @param bonded_port_id Port ID of bonded eth_dev to interrogate - * @param slaves Array to be populated with the current active slaves - * @param len Length of slaves array - * - * @return - * Number of active slaves associated with bonded device on success, - * negative value otherwise - */ -int -rte_eth_bond_active_slaves_get(uint8_t bonded_port_id, uint8_t slaves[], - uint8_t len); - -/** - * Set explicit MAC address to use on bonded device and it's slaves. - * - * @param bonded_port_id Port ID of bonded device. - * @param mac_addr MAC Address to use on bonded device overriding - * slaves MAC addresses - * - * @return - * 0 on success, negative value otherwise - */ -int -rte_eth_bond_mac_address_set(uint8_t bonded_port_id, - struct ether_addr *mac_addr); - -/** - * Reset bonded device to use MAC from primary slave on bonded device and it's - * slaves. - * - * @param bonded_port_id Port ID of bonded device. - * - * @return - * 0 on success, negative value otherwise - */ -int -rte_eth_bond_mac_address_reset(uint8_t bonded_port_id); - -/** - * Set the transmit policy for bonded device to use when it is operating in - * balance mode, this parameter is otherwise ignored in other modes of - * operation. - * - * @param bonded_port_id Port ID of bonded device. - * @param policy Balance mode transmission policy. - * - * @return - * 0 on success, negative value otherwise. - */ -int -rte_eth_bond_xmit_policy_set(uint8_t bonded_port_id, uint8_t policy); - -/** - * Get the transmit policy set on bonded device for balance mode operation - * - * @param bonded_port_id Port ID of bonded device. - * - * @return - * Balance transmit policy on success, negative value otherwise. - */ -int -rte_eth_bond_xmit_policy_get(uint8_t bonded_port_id); - -/** - * Set the link monitoring frequency (in ms) for monitoring the link status of - * slave devices - * - * @param bonded_port_id Port ID of bonded device. - * @param internal_ms Monitoring interval in milliseconds - * - * @return - * 0 on success, negative value otherwise. - */ - -int -rte_eth_bond_link_monitoring_set(uint8_t bonded_port_id, uint32_t internal_ms); - -/** - * Get the current link monitoring frequency (in ms) for monitoring of the link - * status of slave devices - * - * @param bonded_port_id Port ID of bonded device. - * - * @return - * Monitoring interval on success, negative value otherwise. - */ -int -rte_eth_bond_link_monitoring_get(uint8_t bonded_port_id); - - -/** - * Set the period in milliseconds for delaying the disabling of a bonded link - * when the link down status has been detected - * - * @param bonded_port_id Port ID of bonded device. - * @param delay_ms Delay period in milliseconds. - * - * @return - * 0 on success, negative value otherwise. - */ -int -rte_eth_bond_link_down_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms); - -/** - * Get the period in milliseconds set for delaying the disabling of a bonded - * link when the link down status has been detected - * - * @param bonded_port_id Port ID of bonded device. - * - * @return - * Delay period on success, negative value otherwise. - */ -int -rte_eth_bond_link_down_prop_delay_get(uint8_t bonded_port_id); - -/** - * Set the period in milliseconds for delaying the enabling of a bonded link - * when the link up status has been detected - * - * @param bonded_port_id Port ID of bonded device. - * @param delay_ms Delay period in milliseconds. - * - * @return - * 0 on success, negative value otherwise. - */ -int -rte_eth_bond_link_up_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms); - -/** - * Get the period in milliseconds set for delaying the enabling of a bonded - * link when the link up status has been detected - * - * @param bonded_port_id Port ID of bonded device. - * - * @return - * Delay period on success, negative value otherwise. - */ -int -rte_eth_bond_link_up_prop_delay_get(uint8_t bonded_port_id); - - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/lib/librte_pmd_bond/rte_eth_bond_8023ad.c b/lib/librte_pmd_bond/rte_eth_bond_8023ad.c deleted file mode 100644 index 97a828e..0000000 --- a/lib/librte_pmd_bond/rte_eth_bond_8023ad.c +++ /dev/null @@ -1,1216 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include - -#include -#include -#include -#include - -#include "rte_eth_bond_private.h" - -#ifdef RTE_LIBRTE_BOND_DEBUG_8023AD -#define MODE4_DEBUG(fmt, ...) RTE_LOG(DEBUG, PMD, "%6u [Port %u: %s] " fmt, \ - bond_dbg_get_time_diff_ms(), slave_id, \ - __func__, ##__VA_ARGS__) - -static uint64_t start_time; - -static unsigned -bond_dbg_get_time_diff_ms(void) -{ - uint64_t now; - - now = rte_rdtsc(); - if (start_time == 0) - start_time = now; - - return ((now - start_time) * 1000) / rte_get_tsc_hz(); -} - -static void -bond_print_lacp(struct lacpdu *l) -{ - char a_address[18]; - char p_address[18]; - char a_state[256] = { 0 }; - char p_state[256] = { 0 }; - - static const char * const state_labels[] = { - "ACT", "TIMEOUT", "AGG", "SYNC", "COL", "DIST", "DEF", "EXP" - }; - - int a_len = 0; - int p_len = 0; - uint8_t i; - uint8_t *addr; - - addr = l->actor.port_params.system.addr_bytes; - snprintf(a_address, sizeof(a_address), "%02X:%02X:%02X:%02X:%02X:%02X", - addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); - - addr = l->partner.port_params.system.addr_bytes; - snprintf(p_address, sizeof(p_address), "%02X:%02X:%02X:%02X:%02X:%02X", - addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); - - for (i = 0; i < 8; i++) { - if ((l->actor.state >> i) & 1) { - a_len += snprintf(&a_state[a_len], RTE_DIM(a_state) - a_len, "%s ", - state_labels[i]); - } - - if ((l->partner.state >> i) & 1) { - p_len += snprintf(&p_state[p_len], RTE_DIM(p_state) - p_len, "%s ", - state_labels[i]); - } - } - - if (a_len && a_state[a_len-1] == ' ') - a_state[a_len-1] = '\0'; - - if (p_len && p_state[p_len-1] == ' ') - p_state[p_len-1] = '\0'; - - RTE_LOG(DEBUG, PMD, "LACP: {\n"\ - " subtype= %02X\n"\ - " ver_num=%02X\n"\ - " actor={ tlv=%02X, len=%02X\n"\ - " pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n"\ - " state={ %s }\n"\ - " }\n"\ - " partner={ tlv=%02X, len=%02X\n"\ - " pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n"\ - " state={ %s }\n"\ - " }\n"\ - " collector={info=%02X, length=%02X, max_delay=%04X\n, " \ - "type_term=%02X, terminator_length = %02X}\n",\ - l->subtype,\ - l->version_number,\ - l->actor.tlv_type_info,\ - l->actor.info_length,\ - l->actor.port_params.system_priority,\ - a_address,\ - l->actor.port_params.key,\ - l->actor.port_params.port_priority,\ - l->actor.port_params.port_number,\ - a_state,\ - l->partner.tlv_type_info,\ - l->partner.info_length,\ - l->partner.port_params.system_priority,\ - p_address,\ - l->partner.port_params.key,\ - l->partner.port_params.port_priority,\ - l->partner.port_params.port_number,\ - p_state,\ - l->tlv_type_collector_info,\ - l->collector_info_length,\ - l->collector_max_delay,\ - l->tlv_type_terminator,\ - l->terminator_length); - -} -#define BOND_PRINT_LACP(lacpdu) bond_print_lacp(lacpdu) -#else -#define BOND_PRINT_LACP(lacpdu) do { } while (0) -#define MODE4_DEBUG(fmt, ...) do { } while (0) -#endif - -static const struct ether_addr lacp_mac_addr = { - .addr_bytes = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x02 } -}; - -struct port mode_8023ad_ports[RTE_MAX_ETHPORTS]; - -static void -timer_cancel(uint64_t *timer) -{ - *timer = 0; -} - -static void -timer_set(uint64_t *timer, uint64_t timeout) -{ - *timer = rte_rdtsc() + timeout; -} - -/* Forces given timer to be in expired state. */ -static void -timer_force_expired(uint64_t *timer) -{ - *timer = rte_rdtsc(); -} - -static bool -timer_is_stopped(uint64_t *timer) -{ - return *timer == 0; -} - -static bool -timer_is_expired(uint64_t *timer) -{ - return *timer < rte_rdtsc(); -} - -/* Timer is in running state if it is not stopped nor expired */ -static bool -timer_is_running(uint64_t *timer) -{ - return !timer_is_stopped(timer) && !timer_is_expired(timer); -} - -static void -set_warning_flags(struct port *port, uint16_t flags) -{ - int retval; - uint16_t old; - uint16_t new_flag = 0; - - do { - old = port->warnings_to_show; - new_flag = old | flags; - retval = rte_atomic16_cmpset(&port->warnings_to_show, old, new_flag); - } while (unlikely(retval == 0)); -} - -static void -show_warnings(uint8_t slave_id) -{ - struct port *port = &mode_8023ad_ports[slave_id]; - uint8_t warnings; - - do { - warnings = port->warnings_to_show; - } while (rte_atomic16_cmpset(&port->warnings_to_show, warnings, 0) == 0); - - if (!warnings) - return; - - if (!timer_is_expired(&port->warning_timer)) - return; - - - timer_set(&port->warning_timer, BOND_8023AD_WARNINGS_PERIOD_MS * - rte_get_tsc_hz() / 1000); - - if (warnings & WRN_RX_QUEUE_FULL) { - RTE_LOG(DEBUG, PMD, - "Slave %u: failed to enqueue LACP packet into RX ring.\n" - "Receive and transmit functions must be invoked on bonded\n" - "interface at least 10 times per second or LACP will not\n" - "work correctly\n", slave_id); - } - - if (warnings & WRN_TX_QUEUE_FULL) { - RTE_LOG(DEBUG, PMD, - "Slave %u: failed to enqueue LACP packet into TX ring.\n" - "Receive and transmit functions must be invoked on bonded\n" - "interface at least 10 times per second or LACP will not\n" - "work correctly\n", slave_id); - } - - if (warnings & WRN_RX_MARKER_TO_FAST) - RTE_LOG(INFO, PMD, "Slave %u: marker to early - ignoring.\n", slave_id); - - if (warnings & WRN_UNKNOWN_SLOW_TYPE) { - RTE_LOG(INFO, PMD, - "Slave %u: ignoring unknown slow protocol frame type", slave_id); - } - - if (warnings & WRN_UNKNOWN_MARKER_TYPE) - RTE_LOG(INFO, PMD, "Slave %u: ignoring unknown marker type", slave_id); - - if (warnings & WRN_NOT_LACP_CAPABLE) - MODE4_DEBUG("Port %u is not LACP capable!\n", slave_id); -} - -static void -record_default(struct port *port) -{ - /* Record default parameters for partner. Partner admin parameters - * are not implemented so set them to arbitrary default (last known) and - * mark actor that parner is in defaulted state. */ - port->partner_state = STATE_LACP_ACTIVE; - ACTOR_STATE_SET(port, DEFAULTED); -} - -/** Function handles rx state machine. - * - * This function implements Receive State Machine from point 5.4.12 in - * 802.1AX documentation. It should be called periodically. - * - * @param lacpdu LACPDU received. - * @param port Port on which LACPDU was received. - */ -static void -rx_machine(struct bond_dev_private *internals, uint8_t slave_id, - struct lacpdu *lacp) -{ - struct port *agg, *port = &mode_8023ad_ports[slave_id]; - uint64_t timeout; - - if (SM_FLAG(port, BEGIN)) { - /* Initialize stuff */ - MODE4_DEBUG("-> INITIALIZE\n"); - SM_FLAG_CLR(port, MOVED); - port->selected = UNSELECTED; - - record_default(port); - - ACTOR_STATE_CLR(port, EXPIRED); - timer_cancel(&port->current_while_timer); - - /* DISABLED: On initialization partner is out of sync */ - PARTNER_STATE_CLR(port, SYNCHRONIZATION); - - /* LACP DISABLED stuff if LACP not enabled on this port */ - if (!SM_FLAG(port, LACP_ENABLED)) - PARTNER_STATE_CLR(port, AGGREGATION); - else - PARTNER_STATE_SET(port, AGGREGATION); - } - - if (!SM_FLAG(port, LACP_ENABLED)) { - /* Update parameters only if state changed */ - if (!timer_is_stopped(&port->current_while_timer)) { - port->selected = UNSELECTED; - record_default(port); - PARTNER_STATE_CLR(port, AGGREGATION); - ACTOR_STATE_CLR(port, EXPIRED); - timer_cancel(&port->current_while_timer); - } - return; - } - - if (lacp) { - MODE4_DEBUG("LACP -> CURRENT\n"); - BOND_PRINT_LACP(lacp); - /* Update selected flag. If partner parameters are defaulted assume they - * are match. If not defaulted compare LACP actor with ports parner - * params. */ - if (!ACTOR_STATE(port, DEFAULTED) && - (ACTOR_STATE(port, AGGREGATION) != PARTNER_STATE(port, AGGREGATION) - || memcmp(&port->partner, &lacp->actor.port_params, - sizeof(port->partner)) != 0)) { - MODE4_DEBUG("selected <- UNSELECTED\n"); - port->selected = UNSELECTED; - } - - /* Record this PDU actor params as partner params */ - memcpy(&port->partner, &lacp->actor.port_params, - sizeof(struct port_params)); - port->partner_state = lacp->actor.state; - - /* Partner parameters are not defaulted any more */ - ACTOR_STATE_CLR(port, DEFAULTED); - - /* If LACP partner params match this port actor params */ - agg = &mode_8023ad_ports[port->aggregator_port_id]; - bool match = port->actor.system_priority == - lacp->partner.port_params.system_priority && - is_same_ether_addr(&agg->actor.system, - &lacp->partner.port_params.system) && - port->actor.port_priority == - lacp->partner.port_params.port_priority && - port->actor.port_number == - lacp->partner.port_params.port_number; - - /* Update NTT if partners information are outdated (xored and masked - * bits are set)*/ - uint8_t state_mask = STATE_LACP_ACTIVE | STATE_LACP_SHORT_TIMEOUT | - STATE_SYNCHRONIZATION | STATE_AGGREGATION; - - if (((port->actor_state ^ lacp->partner.state) & state_mask) || - match == false) { - SM_FLAG_SET(port, NTT); - } - - /* If LACP partner params match this port actor params */ - if (match == true && ACTOR_STATE(port, AGGREGATION) == - PARTNER_STATE(port, AGGREGATION)) - PARTNER_STATE_SET(port, SYNCHRONIZATION); - else if (!PARTNER_STATE(port, AGGREGATION) && ACTOR_STATE(port, - AGGREGATION)) - PARTNER_STATE_SET(port, SYNCHRONIZATION); - else - PARTNER_STATE_CLR(port, SYNCHRONIZATION); - - if (ACTOR_STATE(port, LACP_SHORT_TIMEOUT)) - timeout = internals->mode4.short_timeout; - else - timeout = internals->mode4.long_timeout; - - timer_set(&port->current_while_timer, timeout); - ACTOR_STATE_CLR(port, EXPIRED); - return; /* No state change */ - } - - /* If CURRENT state timer is not running (stopped or expired) - * transit to EXPIRED state from DISABLED or CURRENT */ - if (!timer_is_running(&port->current_while_timer)) { - ACTOR_STATE_SET(port, EXPIRED); - PARTNER_STATE_CLR(port, SYNCHRONIZATION); - PARTNER_STATE_SET(port, LACP_SHORT_TIMEOUT); - timer_set(&port->current_while_timer, internals->mode4.short_timeout); - } -} - -/** - * Function handles periodic tx state machine. - * - * Function implements Periodic Transmission state machine from point 5.4.13 - * in 802.1AX documentation. It should be called periodically. - * - * @param port Port to handle state machine. - */ -static void -periodic_machine(struct bond_dev_private *internals, uint8_t slave_id) -{ - struct port *port = &mode_8023ad_ports[slave_id]; - /* Calculate if either site is LACP enabled */ - uint64_t timeout; - uint8_t active = ACTOR_STATE(port, LACP_ACTIVE) || - PARTNER_STATE(port, LACP_ACTIVE); - - uint8_t is_partner_fast, was_partner_fast; - /* No periodic is on BEGIN, LACP DISABLE or when both sides are pasive */ - if (SM_FLAG(port, BEGIN) || !SM_FLAG(port, LACP_ENABLED) || !active) { - timer_cancel(&port->periodic_timer); - timer_force_expired(&port->tx_machine_timer); - SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT); - - MODE4_DEBUG("-> NO_PERIODIC ( %s%s%s)\n", - SM_FLAG(port, BEGIN) ? "begind " : "", - SM_FLAG(port, LACP_ENABLED) ? "" : "LACP disabled ", - active ? "LACP active " : "LACP pasive "); - return; - } - - is_partner_fast = PARTNER_STATE(port, LACP_SHORT_TIMEOUT); - was_partner_fast = SM_FLAG(port, PARTNER_SHORT_TIMEOUT); - - /* If periodic timer is not started, transit from NO PERIODIC to FAST/SLOW. - * Other case: check if timer expire or partners settings changed. */ - if (!timer_is_stopped(&port->periodic_timer)) { - if (timer_is_expired(&port->periodic_timer)) { - SM_FLAG_SET(port, NTT); - } else if (is_partner_fast != was_partner_fast) { - /* Partners timeout was slow and now it is fast -> send LACP. - * In other case (was fast and now it is slow) just switch - * timeout to slow without forcing send of LACP (because standard - * say so)*/ - if (!is_partner_fast) - SM_FLAG_SET(port, NTT); - } else - return; /* Nothing changed */ - } - - /* Handle state transition to FAST/SLOW LACP timeout */ - if (is_partner_fast) { - timeout = internals->mode4.fast_periodic_timeout; - SM_FLAG_SET(port, PARTNER_SHORT_TIMEOUT); - } else { - timeout = internals->mode4.slow_periodic_timeout; - SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT); - } - - timer_set(&port->periodic_timer, timeout); -} - -/** - * Function handles mux state machine. - * - * Function implements Mux Machine from point 5.4.15 in 802.1AX documentation. - * It should be called periodically. - * - * @param port Port to handle state machine. - */ -static void -mux_machine(struct bond_dev_private *internals, uint8_t slave_id) -{ - struct port *port = &mode_8023ad_ports[slave_id]; - - /* Save current state for later use */ - const uint8_t state_mask = STATE_SYNCHRONIZATION | STATE_DISTRIBUTING | - STATE_COLLECTING; - - /* Enter DETACHED state on BEGIN condition or from any other state if - * port was unselected */ - if (SM_FLAG(port, BEGIN) || - port->selected == UNSELECTED || (port->selected == STANDBY && - (port->actor_state & state_mask) != 0)) { - /* detach mux from aggregator */ - port->actor_state &= ~state_mask; - /* Set ntt to true if BEGIN condition or transition from any other state - * which is indicated that wait_while_timer was started */ - if (SM_FLAG(port, BEGIN) || - !timer_is_stopped(&port->wait_while_timer)) { - SM_FLAG_SET(port, NTT); - MODE4_DEBUG("-> DETACHED\n"); - } - timer_cancel(&port->wait_while_timer); - } - - if (timer_is_stopped(&port->wait_while_timer)) { - if (port->selected == SELECTED || port->selected == STANDBY) { - timer_set(&port->wait_while_timer, - internals->mode4.aggregate_wait_timeout); - - MODE4_DEBUG("DETACHED -> WAITING\n"); - } - /* Waiting state entered */ - return; - } - - /* Transit next state if port is ready */ - if (!timer_is_expired(&port->wait_while_timer)) - return; - - if ((ACTOR_STATE(port, DISTRIBUTING) || ACTOR_STATE(port, COLLECTING)) && - !PARTNER_STATE(port, SYNCHRONIZATION)) { - /* If in COLLECTING or DISTRIBUTING state and partner becomes out of - * sync transit to ATACHED state. */ - ACTOR_STATE_CLR(port, DISTRIBUTING); - ACTOR_STATE_CLR(port, COLLECTING); - /* Clear actor sync to activate transit ATACHED in condition bellow */ - ACTOR_STATE_CLR(port, SYNCHRONIZATION); - MODE4_DEBUG("Out of sync -> ATTACHED\n"); - } - - if (!ACTOR_STATE(port, SYNCHRONIZATION)) { - /* attach mux to aggregator */ - RTE_VERIFY((port->actor_state & (STATE_COLLECTING | - STATE_DISTRIBUTING)) == 0); - - ACTOR_STATE_SET(port, SYNCHRONIZATION); - SM_FLAG_SET(port, NTT); - MODE4_DEBUG("ATTACHED Entered\n"); - } else if (!ACTOR_STATE(port, COLLECTING)) { - /* Start collecting if in sync */ - if (PARTNER_STATE(port, SYNCHRONIZATION)) { - MODE4_DEBUG("ATTACHED -> COLLECTING\n"); - ACTOR_STATE_SET(port, COLLECTING); - SM_FLAG_SET(port, NTT); - } - } else if (ACTOR_STATE(port, COLLECTING)) { - /* Check if partner is in COLLECTING state. If so this port can - * distribute frames to it */ - if (!ACTOR_STATE(port, DISTRIBUTING)) { - if (PARTNER_STATE(port, COLLECTING)) { - /* Enable DISTRIBUTING if partner is collecting */ - ACTOR_STATE_SET(port, DISTRIBUTING); - SM_FLAG_SET(port, NTT); - MODE4_DEBUG("COLLECTING -> DISTRIBUTING\n"); - RTE_LOG(INFO, PMD, - "Bond %u: slave id %u distributing started.\n", - internals->port_id, slave_id); - } - } else { - if (!PARTNER_STATE(port, COLLECTING)) { - /* Disable DISTRIBUTING (enter COLLECTING state) if partner - * is not collecting */ - ACTOR_STATE_CLR(port, DISTRIBUTING); - SM_FLAG_SET(port, NTT); - MODE4_DEBUG("DISTRIBUTING -> COLLECTING\n"); - RTE_LOG(INFO, PMD, - "Bond %u: slave id %u distributing stopped.\n", - internals->port_id, slave_id); - } - } - } -} - -/** - * Function handles transmit state machine. - * - * Function implements Transmit Machine from point 5.4.16 in 802.1AX - * documentation. - * - * @param port - */ -static void -tx_machine(struct bond_dev_private *internals, uint8_t slave_id) -{ - struct port *agg, *port = &mode_8023ad_ports[slave_id]; - - struct rte_mbuf *lacp_pkt = NULL; - struct lacpdu_header *hdr; - struct lacpdu *lacpdu; - - /* If periodic timer is not running periodic machine is in NO PERIODIC and - * according to 802.3ax standard tx machine should not transmit any frames - * and set ntt to false. */ - if (timer_is_stopped(&port->periodic_timer)) - SM_FLAG_CLR(port, NTT); - - if (!SM_FLAG(port, NTT)) - return; - - if (!timer_is_expired(&port->tx_machine_timer)) - return; - - lacp_pkt = rte_pktmbuf_alloc(port->mbuf_pool); - if (lacp_pkt == NULL) { - RTE_LOG(ERR, PMD, "Failed to allocate LACP packet from pool\n"); - return; - } - - lacp_pkt->data_len = sizeof(*hdr); - lacp_pkt->pkt_len = sizeof(*hdr); - - hdr = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *); - - /* Source and destination MAC */ - ether_addr_copy(&lacp_mac_addr, &hdr->eth_hdr.d_addr); - rte_eth_macaddr_get(slave_id, &hdr->eth_hdr.s_addr); - hdr->eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_SLOW); - - lacpdu = &hdr->lacpdu; - memset(lacpdu, 0, sizeof(*lacpdu)); - - /* Initialize LACP part */ - lacpdu->subtype = SLOW_SUBTYPE_LACP; - lacpdu->version_number = 1; - - /* ACTOR */ - lacpdu->actor.tlv_type_info = TLV_TYPE_ACTOR_INFORMATION; - lacpdu->actor.info_length = sizeof(struct lacpdu_actor_partner_params); - memcpy(&hdr->lacpdu.actor.port_params, &port->actor, - sizeof(port->actor)); - agg = &mode_8023ad_ports[port->aggregator_port_id]; - ether_addr_copy(&agg->actor.system, &hdr->lacpdu.actor.port_params.system); - lacpdu->actor.state = port->actor_state; - - /* PARTNER */ - lacpdu->partner.tlv_type_info = TLV_TYPE_PARTNER_INFORMATION; - lacpdu->partner.info_length = sizeof(struct lacpdu_actor_partner_params); - memcpy(&lacpdu->partner.port_params, &port->partner, - sizeof(struct port_params)); - lacpdu->partner.state = port->partner_state; - - /* Other fields */ - lacpdu->tlv_type_collector_info = TLV_TYPE_COLLECTOR_INFORMATION; - lacpdu->collector_info_length = 0x10; - lacpdu->collector_max_delay = 0; - - lacpdu->tlv_type_terminator = TLV_TYPE_TERMINATOR_INFORMATION; - lacpdu->terminator_length = 0; - - if (rte_ring_enqueue(port->tx_ring, lacp_pkt) == -ENOBUFS) { - /* If TX ring full, drop packet and free message. Retransmission - * will happen in next function call. */ - rte_pktmbuf_free(lacp_pkt); - set_warning_flags(port, WRN_TX_QUEUE_FULL); - return; - } - - MODE4_DEBUG("sending LACP frame\n"); - BOND_PRINT_LACP(lacpdu); - - timer_set(&port->tx_machine_timer, internals->mode4.tx_period_timeout); - SM_FLAG_CLR(port, NTT); -} - -/** - * Function assigns port to aggregator. - * - * @param bond_dev_private Pointer to bond_dev_private structure. - * @param port_pos Port to assign. - */ -static void -selection_logic(struct bond_dev_private *internals, uint8_t slave_id) -{ - struct port *agg, *port; - uint8_t slaves_count, new_agg_id, i; - uint8_t *slaves; - - slaves = internals->active_slaves; - slaves_count = internals->active_slave_count; - port = &mode_8023ad_ports[slave_id]; - - /* Search for aggregator suitable for this port */ - for (i = 0; i < slaves_count; ++i) { - agg = &mode_8023ad_ports[slaves[i]]; - /* Skip ports that are not aggreagators */ - if (agg->aggregator_port_id != slaves[i]) - continue; - - /* Actors system ID is not checked since all slave device have the same - * ID (MAC address). */ - if ((agg->actor.key == port->actor.key && - agg->partner.system_priority == port->partner.system_priority && - is_same_ether_addr(&agg->partner.system, &port->partner.system) == 1 - && (agg->partner.key == port->partner.key)) && - is_zero_ether_addr(&port->partner.system) != 1 && - (agg->actor.key & - rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) != 0) { - - break; - } - } - - /* By default, port uses it self as agregator */ - if (i == slaves_count) - new_agg_id = slave_id; - else - new_agg_id = slaves[i]; - - if (new_agg_id != port->aggregator_port_id) { - port->aggregator_port_id = new_agg_id; - - MODE4_DEBUG("-> SELECTED: ID=%3u\n" - "\t%s aggregator ID=%3u\n", - port->aggregator_port_id, - port->aggregator_port_id == slave_id ? - "aggregator not found, using default" : "aggregator found", - port->aggregator_port_id); - } - - port->selected = SELECTED; -} - -/* Function maps DPDK speed to bonding speed stored in key field */ -static uint16_t -link_speed_key(uint16_t speed) { - uint16_t key_speed; - - switch (speed) { - case ETH_LINK_SPEED_AUTONEG: - key_speed = 0x00; - break; - case ETH_LINK_SPEED_10: - key_speed = BOND_LINK_SPEED_KEY_10M; - break; - case ETH_LINK_SPEED_100: - key_speed = BOND_LINK_SPEED_KEY_100M; - break; - case ETH_LINK_SPEED_1000: - key_speed = BOND_LINK_SPEED_KEY_1000M; - break; - case ETH_LINK_SPEED_10G: - key_speed = BOND_LINK_SPEED_KEY_10G; - break; - case ETH_LINK_SPEED_20G: - key_speed = BOND_LINK_SPEED_KEY_20G; - break; - case ETH_LINK_SPEED_40G: - key_speed = BOND_LINK_SPEED_KEY_40G; - break; - default: - /* Unknown speed*/ - key_speed = 0xFFFF; - } - - return key_speed; -} - -static void -bond_mode_8023ad_periodic_cb(void *arg) -{ - struct rte_eth_dev *bond_dev = arg; - struct bond_dev_private *internals = bond_dev->data->dev_private; - struct port *port; - struct rte_eth_link link_info; - struct ether_addr slave_addr; - - void *pkt = NULL; - uint8_t i, slave_id; - - - /* Update link status on each port */ - for (i = 0; i < internals->active_slave_count; i++) { - uint16_t key; - - slave_id = internals->active_slaves[i]; - rte_eth_link_get(slave_id, &link_info); - rte_eth_macaddr_get(slave_id, &slave_addr); - - if (link_info.link_status != 0) { - key = link_speed_key(link_info.link_speed) << 1; - if (link_info.link_duplex == ETH_LINK_FULL_DUPLEX) - key |= BOND_LINK_FULL_DUPLEX_KEY; - } else - key = 0; - - port = &mode_8023ad_ports[slave_id]; - - key = rte_cpu_to_be_16(key); - if (key != port->actor.key) { - if (!(key & rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY))) - set_warning_flags(port, WRN_NOT_LACP_CAPABLE); - - port->actor.key = key; - SM_FLAG_SET(port, NTT); - } - - if (!is_same_ether_addr(&port->actor.system, &slave_addr)) { - ether_addr_copy(&slave_addr, &port->actor.system); - if (port->aggregator_port_id == slave_id) - SM_FLAG_SET(port, NTT); - } - } - - for (i = 0; i < internals->active_slave_count; i++) { - slave_id = internals->active_slaves[i]; - port = &mode_8023ad_ports[slave_id]; - - if ((port->actor.key & - rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) == 0) { - - SM_FLAG_SET(port, BEGIN); - - /* LACP is disabled on half duples or link is down */ - if (SM_FLAG(port, LACP_ENABLED)) { - /* If port was enabled set it to BEGIN state */ - SM_FLAG_CLR(port, LACP_ENABLED); - ACTOR_STATE_CLR(port, DISTRIBUTING); - ACTOR_STATE_CLR(port, COLLECTING); - } - - /* Skip this port processing */ - continue; - } - - SM_FLAG_SET(port, LACP_ENABLED); - - /* Find LACP packet to this port. Do not check subtype, it is done in - * function that queued packet */ - if (rte_ring_dequeue(port->rx_ring, &pkt) == 0) { - struct rte_mbuf *lacp_pkt = pkt; - struct lacpdu_header *lacp; - - lacp = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *); - RTE_VERIFY(lacp->lacpdu.subtype == SLOW_SUBTYPE_LACP); - - /* This is LACP frame so pass it to rx_machine */ - rx_machine(internals, slave_id, &lacp->lacpdu); - rte_pktmbuf_free(lacp_pkt); - } else - rx_machine(internals, slave_id, NULL); - - periodic_machine(internals, slave_id); - mux_machine(internals, slave_id); - tx_machine(internals, slave_id); - selection_logic(internals, slave_id); - - SM_FLAG_CLR(port, BEGIN); - show_warnings(slave_id); - } - - rte_eal_alarm_set(internals->mode4.update_timeout_us, - bond_mode_8023ad_periodic_cb, arg); -} - -void -bond_mode_8023ad_activate_slave(struct rte_eth_dev *bond_dev, uint8_t slave_id) -{ - struct bond_dev_private *internals = bond_dev->data->dev_private; - - struct port *port = &mode_8023ad_ports[slave_id]; - struct port_params initial = { - .system = { { 0 } }, - .system_priority = rte_cpu_to_be_16(0xFFFF), - .key = rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY), - .port_priority = rte_cpu_to_be_16(0x00FF), - .port_number = 0, - }; - - char mem_name[RTE_ETH_NAME_MAX_LEN]; - uint8_t socket_id; - unsigned element_size; - - /* Given slave mus not be in active list */ - RTE_VERIFY(find_slave_by_id(internals->active_slaves, - internals->active_slave_count, slave_id) == internals->active_slave_count); - - memcpy(&port->actor, &initial, sizeof(struct port_params)); - /* Standard requires that port ID must be grater than 0. - * Add 1 do get corresponding port_number */ - port->actor.port_number = rte_cpu_to_be_16((uint16_t)slave_id + 1); - - memcpy(&port->partner, &initial, sizeof(struct port_params)); - - /* default states */ - port->actor_state = STATE_AGGREGATION | STATE_LACP_ACTIVE | STATE_DEFAULTED; - port->partner_state = STATE_LACP_ACTIVE; - port->sm_flags = SM_FLAGS_BEGIN; - - /* use this port as agregator */ - port->aggregator_port_id = slave_id; - rte_eth_promiscuous_enable(slave_id); - - timer_cancel(&port->warning_timer); - - if (port->mbuf_pool != NULL) - return; - - RTE_VERIFY(port->rx_ring == NULL); - RTE_VERIFY(port->tx_ring == NULL); - socket_id = rte_eth_devices[slave_id].pci_dev->numa_node; - - element_size = sizeof(struct slow_protocol_frame) + sizeof(struct rte_mbuf) - + RTE_PKTMBUF_HEADROOM; - - /* How big memory pool should be? If driver will not - * free packets quick enough there will be ENOMEM in tx_machine. - * For now give 511 pkts * max number of queued TX packets per slave. - * Hope it will be enough. */ - snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_pool", slave_id); - port->mbuf_pool = rte_mempool_create(mem_name, - BOND_MODE_8023AX_SLAVE_TX_PKTS * 512 - 1, - element_size, - RTE_MEMPOOL_CACHE_MAX_SIZE >= 32 ? 32 : RTE_MEMPOOL_CACHE_MAX_SIZE, - sizeof(struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init, - NULL, rte_pktmbuf_init, NULL, socket_id, MEMPOOL_F_NO_SPREAD); - - /* Any memory allocation failure in initalization is critical because - * resources can't be free, so reinitialization is impossible. */ - if (port->mbuf_pool == NULL) { - rte_panic("Slave %u: Failed to create memory pool '%s': %s\n", - slave_id, mem_name, rte_strerror(rte_errno)); - } - - snprintf(mem_name, RTE_DIM(mem_name), "slave_%u_rx", slave_id); - port->rx_ring = rte_ring_create(mem_name, - rte_align32pow2(BOND_MODE_8023AX_SLAVE_RX_PKTS), socket_id, 0); - - if (port->rx_ring == NULL) { - rte_panic("Slave %u: Failed to create rx ring '%s': %s\n", slave_id, - mem_name, rte_strerror(rte_errno)); - } - - /* TX ring is at least one pkt longer to make room for marker packet. */ - snprintf(mem_name, RTE_DIM(mem_name), "slave_%u_tx", slave_id); - port->tx_ring = rte_ring_create(mem_name, - rte_align32pow2(BOND_MODE_8023AX_SLAVE_TX_PKTS + 1), socket_id, 0); - - if (port->tx_ring == NULL) { - rte_panic("Slave %u: Failed to create tx ring '%s': %s\n", slave_id, - mem_name, rte_strerror(rte_errno)); - } -} - -int -bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *bond_dev, - uint8_t slave_id) -{ - struct bond_dev_private *internals = bond_dev->data->dev_private; - void *pkt = NULL; - struct port *port; - uint8_t i; - - /* Given slave mus be in active list */ - RTE_VERIFY(find_slave_by_id(internals->active_slaves, - internals->active_slave_count, slave_id) < internals->active_slave_count); - - /* Exclude slave from transmit policy. If this slave is an aggregator - * make all aggregated slaves unselected to force sellection logic - * to select suitable aggregator for this port. */ - for (i = 0; i < internals->active_slave_count; i++) { - port = &mode_8023ad_ports[internals->active_slaves[i]]; - if (port->aggregator_port_id != slave_id) - continue; - - port->selected = UNSELECTED; - - /* Use default aggregator */ - port->aggregator_port_id = internals->active_slaves[i]; - } - - port = &mode_8023ad_ports[slave_id]; - port->selected = UNSELECTED; - port->actor_state &= ~(STATE_SYNCHRONIZATION | STATE_DISTRIBUTING | - STATE_COLLECTING); - - while (rte_ring_dequeue(port->rx_ring, &pkt) == 0) - rte_pktmbuf_free((struct rte_mbuf *)pkt); - - while (rte_ring_dequeue(port->tx_ring, &pkt) == 0) - rte_pktmbuf_free((struct rte_mbuf *)pkt); - return 0; -} - -void -bond_mode_8023ad_mac_address_update(struct rte_eth_dev *bond_dev) -{ - struct bond_dev_private *internals = bond_dev->data->dev_private; - struct ether_addr slave_addr; - struct port *slave, *agg_slave; - uint8_t slave_id, i, j; - - bond_mode_8023ad_stop(bond_dev); - - for (i = 0; i < internals->active_slave_count; i++) { - slave_id = internals->active_slaves[i]; - slave = &mode_8023ad_ports[slave_id]; - rte_eth_macaddr_get(slave_id, &slave_addr); - - if (is_same_ether_addr(&slave_addr, &slave->actor.system)) - continue; - - ether_addr_copy(&slave_addr, &slave->actor.system); - /* Do nothing if this port is not an aggregator. In other case - * Set NTT flag on every port that use this aggregator. */ - if (slave->aggregator_port_id != slave_id) - continue; - - for (j = 0; j < internals->active_slave_count; j++) { - agg_slave = &mode_8023ad_ports[internals->active_slaves[j]]; - if (agg_slave->aggregator_port_id == slave_id) - SM_FLAG_SET(agg_slave, NTT); - } - } - - if (bond_dev->data->dev_started) - bond_mode_8023ad_start(bond_dev); -} - -void -bond_mode_8023ad_conf_get(struct rte_eth_dev *dev, - struct rte_eth_bond_8023ad_conf *conf) -{ - struct bond_dev_private *internals = dev->data->dev_private; - struct mode8023ad_private *mode4 = &internals->mode4; - uint64_t ms_ticks = rte_get_tsc_hz() / 1000; - - conf->fast_periodic_ms = mode4->fast_periodic_timeout / ms_ticks; - conf->slow_periodic_ms = mode4->slow_periodic_timeout / ms_ticks; - conf->short_timeout_ms = mode4->short_timeout / ms_ticks; - conf->long_timeout_ms = mode4->long_timeout / ms_ticks; - conf->aggregate_wait_timeout_ms = mode4->aggregate_wait_timeout / ms_ticks; - conf->tx_period_ms = mode4->tx_period_timeout / ms_ticks; - conf->update_timeout_ms = mode4->update_timeout_us / 1000; -} - -void -bond_mode_8023ad_setup(struct rte_eth_dev *dev, - struct rte_eth_bond_8023ad_conf *conf) -{ - struct rte_eth_bond_8023ad_conf def_conf; - struct bond_dev_private *internals = dev->data->dev_private; - struct mode8023ad_private *mode4 = &internals->mode4; - uint64_t ms_ticks = rte_get_tsc_hz() / 1000; - - if (conf == NULL) { - conf = &def_conf; - conf->fast_periodic_ms = BOND_8023AD_FAST_PERIODIC_MS; - conf->slow_periodic_ms = BOND_8023AD_SLOW_PERIODIC_MS; - conf->short_timeout_ms = BOND_8023AD_SHORT_TIMEOUT_MS; - conf->long_timeout_ms = BOND_8023AD_LONG_TIMEOUT_MS; - conf->aggregate_wait_timeout_ms = BOND_8023AD_AGGREGATE_WAIT_TIMEOUT_MS; - conf->tx_period_ms = BOND_8023AD_TX_MACHINE_PERIOD_MS; - conf->rx_marker_period_ms = BOND_8023AD_RX_MARKER_PERIOD_MS; - conf->update_timeout_ms = BOND_MODE_8023AX_UPDATE_TIMEOUT_MS; - } - - mode4->fast_periodic_timeout = conf->fast_periodic_ms * ms_ticks; - mode4->slow_periodic_timeout = conf->slow_periodic_ms * ms_ticks; - mode4->short_timeout = conf->short_timeout_ms * ms_ticks; - mode4->long_timeout = conf->long_timeout_ms * ms_ticks; - mode4->aggregate_wait_timeout = conf->aggregate_wait_timeout_ms * ms_ticks; - mode4->tx_period_timeout = conf->tx_period_ms * ms_ticks; - mode4->rx_marker_timeout = conf->rx_marker_period_ms * ms_ticks; - mode4->update_timeout_us = conf->update_timeout_ms * 1000; -} - -int -bond_mode_8023ad_enable(struct rte_eth_dev *bond_dev) -{ - struct bond_dev_private *internals = bond_dev->data->dev_private; - uint8_t i; - - for (i = 0; i < internals->active_slave_count; i++) - bond_mode_8023ad_activate_slave(bond_dev, i); - - return 0; -} - -int -bond_mode_8023ad_start(struct rte_eth_dev *bond_dev) -{ - return rte_eal_alarm_set(BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * 1000, - &bond_mode_8023ad_periodic_cb, bond_dev); -} - -void -bond_mode_8023ad_stop(struct rte_eth_dev *bond_dev) -{ - rte_eal_alarm_cancel(&bond_mode_8023ad_periodic_cb, bond_dev); -} - -void -bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals, - uint8_t slave_id, struct rte_mbuf *pkt) -{ - struct mode8023ad_private *mode4 = &internals->mode4; - struct port *port = &mode_8023ad_ports[slave_id]; - struct marker_header *m_hdr; - uint64_t marker_timer, old_marker_timer; - int retval; - uint8_t wrn, subtype; - /* If packet is a marker, we send response now by reusing given packet - * and update only source MAC, destination MAC is multicast so don't - * update it. Other frames will be handled later by state machines */ - subtype = rte_pktmbuf_mtod(pkt, - struct slow_protocol_frame *)->slow_protocol.subtype; - - if (subtype == SLOW_SUBTYPE_MARKER) { - m_hdr = rte_pktmbuf_mtod(pkt, struct marker_header *); - - if (likely(m_hdr->marker.tlv_type_marker != MARKER_TLV_TYPE_INFO)) { - wrn = WRN_UNKNOWN_MARKER_TYPE; - goto free_out; - } - - /* Setup marker timer. Do it in loop in case concurent access. */ - do { - old_marker_timer = port->rx_marker_timer; - if (!timer_is_expired(&old_marker_timer)) { - wrn = WRN_RX_MARKER_TO_FAST; - goto free_out; - } - - timer_set(&marker_timer, mode4->rx_marker_timeout); - retval = rte_atomic64_cmpset(&port->rx_marker_timer, - old_marker_timer, marker_timer); - } while (unlikely(retval == 0)); - - m_hdr->marker.tlv_type_marker = MARKER_TLV_TYPE_RESP; - rte_eth_macaddr_get(slave_id, &m_hdr->eth_hdr.s_addr); - - if (unlikely(rte_ring_enqueue(port->tx_ring, pkt) == -ENOBUFS)) { - /* reset timer */ - port->rx_marker_timer = 0; - wrn = WRN_TX_QUEUE_FULL; - goto free_out; - } - } else if (likely(subtype == SLOW_SUBTYPE_LACP)) { - if (unlikely(rte_ring_enqueue(port->rx_ring, pkt) == -ENOBUFS)) { - /* If RX fing full free lacpdu message and drop packet */ - wrn = WRN_RX_QUEUE_FULL; - goto free_out; - } - } else { - wrn = WRN_UNKNOWN_SLOW_TYPE; - goto free_out; - } - - return; - -free_out: - set_warning_flags(port, wrn); - rte_pktmbuf_free(pkt); -} - -int -rte_eth_bond_8023ad_conf_get(uint8_t port_id, - struct rte_eth_bond_8023ad_conf *conf) -{ - struct rte_eth_dev *bond_dev; - - if (valid_bonded_port_id(port_id) != 0) - return -EINVAL; - - if (conf == NULL) - return -EINVAL; - - bond_dev = &rte_eth_devices[port_id]; - bond_mode_8023ad_conf_get(bond_dev, conf); - return 0; -} - -int -rte_eth_bond_8023ad_setup(uint8_t port_id, - struct rte_eth_bond_8023ad_conf *conf) -{ - struct rte_eth_dev *bond_dev; - - if (valid_bonded_port_id(port_id) != 0) - return -EINVAL; - - if (conf != NULL) { - /* Basic sanity check */ - if (conf->slow_periodic_ms == 0 || - conf->fast_periodic_ms >= conf->slow_periodic_ms || - conf->long_timeout_ms == 0 || - conf->short_timeout_ms >= conf->long_timeout_ms || - conf->aggregate_wait_timeout_ms == 0 || - conf->tx_period_ms == 0 || - conf->rx_marker_period_ms == 0 || - conf->update_timeout_ms == 0) { - RTE_LOG(ERR, PMD, "given mode 4 configuration is invalid\n"); - return -EINVAL; - } - } - - bond_dev = &rte_eth_devices[port_id]; - bond_mode_8023ad_setup(bond_dev, conf); - - return 0; -} - -int -rte_eth_bond_8023ad_slave_info(uint8_t port_id, uint8_t slave_id, - struct rte_eth_bond_8023ad_slave_info *info) -{ - struct rte_eth_dev *bond_dev; - struct bond_dev_private *internals; - struct port *port; - - if (info == NULL || valid_bonded_port_id(port_id) != 0 || - rte_eth_bond_mode_get(port_id) != BONDING_MODE_8023AD) - return -EINVAL; - - bond_dev = &rte_eth_devices[port_id]; - - internals = bond_dev->data->dev_private; - if (find_slave_by_id(internals->active_slaves, - internals->active_slave_count, slave_id) == - internals->active_slave_count) - return -EINVAL; - - port = &mode_8023ad_ports[slave_id]; - info->selected = port->selected; - - info->actor_state = port->actor_state; - rte_memcpy(&info->actor, &port->actor, sizeof(port->actor)); - - info->partner_state = port->partner_state; - rte_memcpy(&info->partner, &port->partner, sizeof(port->partner)); - - info->agg_port_id = port->aggregator_port_id; - return 0; -} diff --git a/lib/librte_pmd_bond/rte_eth_bond_8023ad.h b/lib/librte_pmd_bond/rte_eth_bond_8023ad.h deleted file mode 100644 index ebd0e93..0000000 --- a/lib/librte_pmd_bond/rte_eth_bond_8023ad.h +++ /dev/null @@ -1,222 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef RTE_ETH_BOND_8023AD_H_ -#define RTE_ETH_BOND_8023AD_H_ - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * Actor/partner states - */ -#define STATE_LACP_ACTIVE 0x01 -#define STATE_LACP_SHORT_TIMEOUT 0x02 -#define STATE_AGGREGATION 0x04 -#define STATE_SYNCHRONIZATION 0x08 -#define STATE_COLLECTING 0x10 -#define STATE_DISTRIBUTING 0x20 -/** Partners parameters are defaulted */ -#define STATE_DEFAULTED 0x40 -#define STATE_EXPIRED 0x80 - -#define TLV_TYPE_ACTOR_INFORMATION 0x01 -#define TLV_TYPE_PARTNER_INFORMATION 0x02 -#define TLV_TYPE_COLLECTOR_INFORMATION 0x03 -#define TLV_TYPE_TERMINATOR_INFORMATION 0x00 - -#define SLOW_SUBTYPE_LACP 0x01 -#define SLOW_SUBTYPE_MARKER 0x02 - -#define MARKER_TLV_TYPE_INFO 0x01 -#define MARKER_TLV_TYPE_RESP 0x02 - -enum rte_bond_8023ad_selection { - UNSELECTED, - STANDBY, - SELECTED -}; - -/** Generic slow protocol structure */ -struct slow_protocol { - uint8_t subtype; - uint8_t reserved_119[119]; -} __attribute__((__packed__)); - -/** Generic slow protocol frame type structure */ -struct slow_protocol_frame { - struct ether_hdr eth_hdr; - struct slow_protocol slow_protocol; -} __attribute__((__packed__)); - -struct port_params { - uint16_t system_priority; - /**< System priority (unused in current implementation) */ - struct ether_addr system; - /**< System ID - Slave MAC address, same as bonding MAC address */ - uint16_t key; - /**< Speed information (implementation dependednt) and duplex. */ - uint16_t port_priority; - /**< Priority of this (unused in current implementation) */ - uint16_t port_number; - /**< Port number. It corresponds to slave port id. */ -} __attribute__((__packed__)); - -struct lacpdu_actor_partner_params { - uint8_t tlv_type_info; - uint8_t info_length; - struct port_params port_params; - uint8_t state; - uint8_t reserved_3[3]; -} __attribute__((__packed__)); - -/** LACPDU structure (5.4.2 in 802.1AX documentation). */ -struct lacpdu { - uint8_t subtype; - uint8_t version_number; - - struct lacpdu_actor_partner_params actor; - struct lacpdu_actor_partner_params partner; - - uint8_t tlv_type_collector_info; - uint8_t collector_info_length; - uint16_t collector_max_delay; - uint8_t reserved_12[12]; - - uint8_t tlv_type_terminator; - uint8_t terminator_length; - uint8_t reserved_50[50]; -} __attribute__((__packed__)); - -/** LACPDU frame: Contains ethernet header and LACPDU. */ -struct lacpdu_header { - struct ether_hdr eth_hdr; - struct lacpdu lacpdu; -} __attribute__((__packed__)); - -struct marker { - uint8_t subtype; - uint8_t version_number; - - uint8_t tlv_type_marker; - uint8_t info_length; - uint16_t requester_port; - struct ether_addr requester_system; - uint32_t requester_transaction_id; - uint8_t reserved_2[2]; - - uint8_t tlv_type_terminator; - uint8_t terminator_length; - uint8_t reserved_90[90]; -} __attribute__((__packed__)); - -struct marker_header { - struct ether_hdr eth_hdr; - struct marker marker; -} __attribute__((__packed__)); - -struct rte_eth_bond_8023ad_conf { - uint32_t fast_periodic_ms; - uint32_t slow_periodic_ms; - uint32_t short_timeout_ms; - uint32_t long_timeout_ms; - uint32_t aggregate_wait_timeout_ms; - uint32_t tx_period_ms; - uint32_t rx_marker_period_ms; - uint32_t update_timeout_ms; -}; - -struct rte_eth_bond_8023ad_slave_info { - enum rte_bond_8023ad_selection selected; - uint8_t actor_state; - struct port_params actor; - uint8_t partner_state; - struct port_params partner; - uint8_t agg_port_id; -}; - -/** - * @internal - * - * Function returns current configuration of 802.3AX mode. - * - * @param port_id Bonding device id - * @param conf Pointer to timeout structure. - * - * @return - * 0 - if ok - * -EINVAL if conf is NULL - */ -int -rte_eth_bond_8023ad_conf_get(uint8_t port_id, - struct rte_eth_bond_8023ad_conf *conf); - -/** - * @internal - * - * Function set new configuration of 802.3AX mode. - * - * @param port_id Bonding device id - * @param conf Configuration, if NULL set default configuration. - * @return - * 0 - if ok - * -EINVAL if configuration is invalid. - */ -int -rte_eth_bond_8023ad_setup(uint8_t port_id, - struct rte_eth_bond_8023ad_conf *conf); - -/** - * @internal - * - * Function returns current state of given slave device. - * - * @param slave_id Port id of valid slave. - * @param conf buffer for configuration - * @return - * 0 - if ok - * -EINVAL if conf is NULL or slave id is invalid (not a slave of given - * bonded device or is not inactive). - */ -int -rte_eth_bond_8023ad_slave_info(uint8_t port_id, uint8_t slave_id, - struct rte_eth_bond_8023ad_slave_info *conf); - -#ifdef __cplusplus -} -#endif - -#endif /* RTE_ETH_BOND_8023AD_H_ */ diff --git a/lib/librte_pmd_bond/rte_eth_bond_8023ad_private.h b/lib/librte_pmd_bond/rte_eth_bond_8023ad_private.h deleted file mode 100644 index 8adee70..0000000 --- a/lib/librte_pmd_bond/rte_eth_bond_8023ad_private.h +++ /dev/null @@ -1,308 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef RTE_ETH_BOND_8023AD_PRIVATE_H_ -#define RTE_ETH_BOND_8023AD_PRIVATE_H_ - -#include - -#include -#include -#include - -#include "rte_eth_bond_8023ad.h" - -#define BOND_MODE_8023AX_UPDATE_TIMEOUT_MS 100 -/** Maximum number of packets to one slave queued in TX ring. */ -#define BOND_MODE_8023AX_SLAVE_RX_PKTS 3 -/** Maximum number of LACP packets from one slave queued in TX ring. */ -#define BOND_MODE_8023AX_SLAVE_TX_PKTS 1 -/** - * Timeouts deffinitions (5.4.4 in 802.1AX documentation). - */ -#define BOND_8023AD_FAST_PERIODIC_MS 900 -#define BOND_8023AD_SLOW_PERIODIC_MS 29000 -#define BOND_8023AD_SHORT_TIMEOUT_MS 3000 -#define BOND_8023AD_LONG_TIMEOUT_MS 90000 -#define BOND_8023AD_CHURN_DETECTION_TIMEOUT_MS 60000 -#define BOND_8023AD_AGGREGATE_WAIT_TIMEOUT_MS 2000 -#define BOND_8023AD_TX_MACHINE_PERIOD_MS 500 -#define BOND_8023AD_RX_MARKER_PERIOD_MS 2000 - -/** - * Interval of showing warning message from state machines. All messages will - * be held (and gathered together) to prevent flooding. - * This is no parto of 802.1AX standard. - */ -#define BOND_8023AD_WARNINGS_PERIOD_MS 1000 - - - -/** - * State machine flags - */ -#define SM_FLAGS_BEGIN 0x0001 -#define SM_FLAGS_LACP_ENABLED 0x0002 -#define SM_FLAGS_ACTOR_CHURN 0x0004 -#define SM_FLAGS_PARTNER_CHURN 0x0008 -#define SM_FLAGS_MOVED 0x0100 -#define SM_FLAGS_PARTNER_SHORT_TIMEOUT 0x0200 -#define SM_FLAGS_NTT 0x0400 - -#define BOND_LINK_FULL_DUPLEX_KEY 0x01 -#define BOND_LINK_SPEED_KEY_10M 0x02 -#define BOND_LINK_SPEED_KEY_100M 0x04 -#define BOND_LINK_SPEED_KEY_1000M 0x08 -#define BOND_LINK_SPEED_KEY_10G 0x10 -#define BOND_LINK_SPEED_KEY_20G 0x11 -#define BOND_LINK_SPEED_KEY_40G 0x12 - -#define WRN_RX_MARKER_TO_FAST 0x01 -#define WRN_UNKNOWN_SLOW_TYPE 0x02 -#define WRN_UNKNOWN_MARKER_TYPE 0x04 -#define WRN_NOT_LACP_CAPABLE 0x08 -#define WRN_RX_QUEUE_FULL 0x10 -#define WRN_TX_QUEUE_FULL 0x20 - -#define CHECK_FLAGS(_variable, _f) ((_variable) & (_f)) -#define SET_FLAGS(_variable, _f) ((_variable) |= (_f)) -#define CLEAR_FLAGS(_variable, _f) ((_variable) &= ~(_f)) - -#define SM_FLAG(_p, _f) (!!CHECK_FLAGS((_p)->sm_flags, SM_FLAGS_ ## _f)) -#define SM_FLAG_SET(_p, _f) SET_FLAGS((_p)->sm_flags, SM_FLAGS_ ## _f) -#define SM_FLAG_CLR(_p, _f) CLEAR_FLAGS((_p)->sm_flags, SM_FLAGS_ ## _f) - -#define ACTOR_STATE(_p, _f) (!!CHECK_FLAGS((_p)->actor_state, STATE_ ## _f)) -#define ACTOR_STATE_SET(_p, _f) SET_FLAGS((_p)->actor_state, STATE_ ## _f) -#define ACTOR_STATE_CLR(_p, _f) CLEAR_FLAGS((_p)->actor_state, STATE_ ## _f) - -#define PARTNER_STATE(_p, _f) (!!CHECK_FLAGS((_p)->partner_state, STATE_ ## _f)) -#define PARTNER_STATE_SET(_p, _f) SET_FLAGS((_p)->partner_state, STATE_ ## _f) -#define PARTNER_STATE_CLR(_p, _f) CLEAR_FLAGS((_p)->partner_state, STATE_ ## _f) - -/** Variables associated with each port (5.4.7 in 802.1AX documentation). */ -struct port { - /** - * The operational values of the Actor's state parameters. Bitmask - * of port states. - */ - uint8_t actor_state; - - /** The operational Actor's port parameters */ - struct port_params actor; - - /** - * The operational value of the Actor's view of the current values of - * the Partner's state parameters. The Actor sets this variable either - * to the value received from the Partner in an LACPDU, or to the value - * of Partner_Admin_Port_State. Bitmask of port states. - */ - uint8_t partner_state; - - /** The operational Partner's port parameters */ - struct port_params partner; - - /* Additional port parameters not listed in documentation */ - /** State machine flags */ - uint16_t sm_flags; - enum rte_bond_8023ad_selection selected; - - uint64_t current_while_timer; - uint64_t periodic_timer; - uint64_t wait_while_timer; - uint64_t tx_machine_timer; - uint64_t tx_marker_timer; - /* Agregator parameters */ - /** Used aggregator port ID */ - uint16_t aggregator_port_id; - - /** Memory pool used to allocate rings */ - struct rte_mempool *mbuf_pool; - - /** Ring of LACP packets from RX burst function */ - struct rte_ring *rx_ring; - - /** Ring of slow protocol packets (LACP and MARKERS) to TX burst function */ - struct rte_ring *tx_ring; - - /** Timer which is also used as mutex. If is 0 (not running) RX marker - * packet might be responded. Otherwise shall be dropped. It is zeroed in - * mode 4 callback function after expire. */ - volatile uint64_t rx_marker_timer; - - uint64_t warning_timer; - volatile uint16_t warnings_to_show; -}; - -struct mode8023ad_private { - uint64_t fast_periodic_timeout; - uint64_t slow_periodic_timeout; - uint64_t short_timeout; - uint64_t long_timeout; - uint64_t aggregate_wait_timeout; - uint64_t tx_period_timeout; - uint64_t rx_marker_timeout; - uint64_t update_timeout_us; -}; - -/** - * @internal - * The pool of *port* structures. The size of the pool - * is configured at compile-time in the file. - */ -extern struct port mode_8023ad_ports[]; - -/* Forward declaration */ -struct bond_dev_private; - -/** - * @internal - * - * Get configuration of bonded interface. - * - * - * @param dev Bonded interface - * @param conf returned configuration - */ -void -bond_mode_8023ad_conf_get(struct rte_eth_dev *dev, - struct rte_eth_bond_8023ad_conf *conf); - -/** - * @internal - * - * Set mode 4 configuration of bonded interface. - * - * @pre Bonded interface must be stopped. - * - * @param dev Bonded interface - * @param conf new configuration. If NULL set default configuration. - */ -void -bond_mode_8023ad_setup(struct rte_eth_dev *dev, - struct rte_eth_bond_8023ad_conf *conf); - -/** - * @internal - * - * Enables 802.1AX mode and all active slaves on bonded interface. - * - * @param dev Bonded interface - * @return - * 0 on success, negative value otherwise. - */ -int -bond_mode_8023ad_enable(struct rte_eth_dev *dev); - -/** - * @internal - * - * Disables 802.1AX mode of the bonded interface and slaves. - * - * @param dev Bonded interface - * @return - * 0 on success, negative value otherwise. - */ -int bond_mode_8023ad_disable(struct rte_eth_dev *dev); - -/** - * @internal - * - * Starts 802.3AX state machines management logic. - * @param dev Bonded interface - * @return - * 0 if machines was started, 1 if machines was already running, - * negative value otherwise. - */ -int -bond_mode_8023ad_start(struct rte_eth_dev *dev); - -/** - * @internal - * - * Stops 802.3AX state machines management logic. - * @param dev Bonded interface - * @return - * 0 if this call stopped state machines, -ENOENT if alarm was not set. - */ -void -bond_mode_8023ad_stop(struct rte_eth_dev *dev); - -/** - * @internal - * - * Passes given slow packet to state machines management logic. - * @param internals Bonded device private data. - * @param slave_id Slave port id. - * @param slot_pkt Slow packet. - */ -void -bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals, - uint8_t slave_id, struct rte_mbuf *pkt); - -/** - * @internal - * - * Appends given slave used slave - * - * @param dev Bonded interface. - * @param port_id Slave port ID to be added - * - * @return - * 0 on success, negative value otherwise. - */ -void -bond_mode_8023ad_activate_slave(struct rte_eth_dev *dev, uint8_t port_id); - -/** - * @internal - * - * Denitializes and removes given slave from 802.1AX mode. - * - * @param dev Bonded interface. - * @param slave_num Position of slave in active_slaves array - * - * @return - * 0 on success, negative value otherwise. - */ -int -bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *dev, uint8_t slave_pos); - -/** - * Updates state when MAC was changed on bonded device or one of its slaves. - * @param bond_dev Bonded device - */ -void -bond_mode_8023ad_mac_address_update(struct rte_eth_dev *bond_dev); - -#endif /* RTE_ETH_BOND_8023AD_H_ */ diff --git a/lib/librte_pmd_bond/rte_eth_bond_alb.c b/lib/librte_pmd_bond/rte_eth_bond_alb.c deleted file mode 100644 index 6df318e..0000000 --- a/lib/librte_pmd_bond/rte_eth_bond_alb.c +++ /dev/null @@ -1,287 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "rte_eth_bond_private.h" -#include "rte_eth_bond_alb.h" - -static inline uint8_t -simple_hash(uint8_t *hash_start, int hash_size) -{ - int i; - uint8_t hash; - - hash = 0; - for (i = 0; i < hash_size; ++i) - hash ^= hash_start[i]; - - return hash; -} - -static uint8_t -calculate_slave(struct bond_dev_private *internals) -{ - uint8_t idx; - - idx = (internals->mode6.last_slave + 1) % internals->active_slave_count; - internals->mode6.last_slave = idx; - return internals->active_slaves[idx]; -} - -int -bond_mode_alb_enable(struct rte_eth_dev *bond_dev) -{ - struct bond_dev_private *internals = bond_dev->data->dev_private; - struct client_data *hash_table = internals->mode6.client_table; - - uint16_t data_size; - char mem_name[RTE_ETH_NAME_MAX_LEN]; - int socket_id = bond_dev->pci_dev->numa_node; - - /* Fill hash table with initial values */ - memset(hash_table, 0, sizeof(struct client_data) * ALB_HASH_TABLE_SIZE); - rte_spinlock_init(&internals->mode6.lock); - internals->mode6.last_slave = ALB_NULL_INDEX; - internals->mode6.ntt = 0; - - /* Initialize memory pool for ARP packets to send */ - if (internals->mode6.mempool == NULL) { - /* - * 256 is size of ETH header, ARP header and nested VLAN headers. - * The value is chosen to be cache aligned. - */ - data_size = 256 + RTE_PKTMBUF_HEADROOM; - snprintf(mem_name, sizeof(mem_name), "%s_MODE6", bond_dev->data->name); - internals->mode6.mempool = rte_pktmbuf_pool_create(mem_name, - 512 * RTE_MAX_ETHPORTS, - RTE_MEMPOOL_CACHE_MAX_SIZE >= 32 ? - 32 : RTE_MEMPOOL_CACHE_MAX_SIZE, - 0, data_size, socket_id); - - if (internals->mode6.mempool == NULL) { - RTE_LOG(ERR, PMD, "%s: Failed to initialize ALB mempool.\n", - bond_dev->data->name); - rte_panic( - "Failed to allocate memory pool ('%s')\n" - "for bond device '%s'\n", - mem_name, bond_dev->data->name); - } - } - - return 0; -} - -void bond_mode_alb_arp_recv(struct ether_hdr *eth_h, uint16_t offset, - struct bond_dev_private *internals) { - struct arp_hdr *arp; - - struct client_data *hash_table = internals->mode6.client_table; - struct client_data *client_info; - - uint8_t hash_index; - - arp = (struct arp_hdr *) ((char *) (eth_h + 1) + offset); - - /* ARP Requests are forwarded to the application with no changes */ - if (arp->arp_op != rte_cpu_to_be_16(ARP_OP_REPLY)) - return; - - /* From now on, we analyze only ARP Reply packets */ - hash_index = simple_hash((uint8_t *) &arp->arp_data.arp_sip, - sizeof(arp->arp_data.arp_sip)); - client_info = &hash_table[hash_index]; - - /* - * We got reply for ARP Request send by the application. We need to - * update client table when received data differ from what is stored - * in ALB table and issue sending update packet to that slave. - */ - rte_spinlock_lock(&internals->mode6.lock); - if (client_info->in_use == 0 || - client_info->app_ip != arp->arp_data.arp_tip || - client_info->cli_ip != arp->arp_data.arp_sip || - !is_same_ether_addr(&client_info->cli_mac, &arp->arp_data.arp_sha) || - client_info->vlan_count != offset / sizeof(struct vlan_hdr) || - memcmp(client_info->vlan, eth_h + 1, offset) != 0 - ) { - client_info->in_use = 1; - client_info->app_ip = arp->arp_data.arp_tip; - client_info->cli_ip = arp->arp_data.arp_sip; - ether_addr_copy(&arp->arp_data.arp_sha, &client_info->cli_mac); - client_info->slave_idx = calculate_slave(internals); - rte_eth_macaddr_get(client_info->slave_idx, &client_info->app_mac); - ether_addr_copy(&client_info->app_mac, &arp->arp_data.arp_tha); - memcpy(client_info->vlan, eth_h + 1, offset); - client_info->vlan_count = offset / sizeof(struct vlan_hdr); - } - internals->mode6.ntt = 1; - rte_spinlock_unlock(&internals->mode6.lock); -} - -uint8_t -bond_mode_alb_arp_xmit(struct ether_hdr *eth_h, uint16_t offset, - struct bond_dev_private *internals) -{ - struct arp_hdr *arp; - - struct client_data *hash_table = internals->mode6.client_table; - struct client_data *client_info; - - uint8_t hash_index; - - struct ether_addr bonding_mac; - - arp = (struct arp_hdr *)((char *)(eth_h + 1) + offset); - - /* - * Traffic with src MAC other than bonding should be sent on - * current primary port. - */ - rte_eth_macaddr_get(internals->port_id, &bonding_mac); - if (!is_same_ether_addr(&bonding_mac, &arp->arp_data.arp_sha)) { - rte_eth_macaddr_get(internals->current_primary_port, - &arp->arp_data.arp_sha); - return internals->current_primary_port; - } - - hash_index = simple_hash((uint8_t *)&arp->arp_data.arp_tip, - sizeof(uint32_t)); - client_info = &hash_table[hash_index]; - - rte_spinlock_lock(&internals->mode6.lock); - if (arp->arp_op == rte_cpu_to_be_16(ARP_OP_REPLY)) { - if (client_info->in_use) { - if (client_info->app_ip == arp->arp_data.arp_sip && - client_info->cli_ip == arp->arp_data.arp_tip) { - /* Entry is already assigned to this client */ - if (!is_broadcast_ether_addr(&arp->arp_data.arp_tha)) { - ether_addr_copy(&arp->arp_data.arp_tha, - &client_info->cli_mac); - } - rte_eth_macaddr_get(client_info->slave_idx, - &client_info->app_mac); - ether_addr_copy(&client_info->app_mac, &arp->arp_data.arp_sha); - memcpy(client_info->vlan, eth_h + 1, offset); - client_info->vlan_count = offset / sizeof(struct vlan_hdr); - rte_spinlock_unlock(&internals->mode6.lock); - return client_info->slave_idx; - } - } - - /* Assign new slave to this client and update src mac in ARP */ - client_info->in_use = 1; - client_info->ntt = 0; - client_info->app_ip = arp->arp_data.arp_sip; - ether_addr_copy(&arp->arp_data.arp_tha, &client_info->cli_mac); - client_info->cli_ip = arp->arp_data.arp_tip; - client_info->slave_idx = calculate_slave(internals); - rte_eth_macaddr_get(client_info->slave_idx, &client_info->app_mac); - ether_addr_copy(&client_info->app_mac, &arp->arp_data.arp_sha); - memcpy(client_info->vlan, eth_h + 1, offset); - client_info->vlan_count = offset / sizeof(struct vlan_hdr); - rte_spinlock_unlock(&internals->mode6.lock); - return client_info->slave_idx; - } - - /* If packet is not ARP Reply, send it on current primary port. */ - rte_spinlock_unlock(&internals->mode6.lock); - rte_eth_macaddr_get(internals->current_primary_port, - &arp->arp_data.arp_sha); - return internals->current_primary_port; -} - -uint8_t -bond_mode_alb_arp_upd(struct client_data *client_info, - struct rte_mbuf *pkt, struct bond_dev_private *internals) -{ - struct ether_hdr *eth_h; - struct arp_hdr *arp_h; - uint8_t slave_idx; - - rte_spinlock_lock(&internals->mode6.lock); - eth_h = rte_pktmbuf_mtod(pkt, struct ether_hdr *); - - ether_addr_copy(&client_info->app_mac, ð_h->s_addr); - ether_addr_copy(&client_info->cli_mac, ð_h->d_addr); - if (client_info->vlan_count > 0) - eth_h->ether_type = rte_cpu_to_be_16(ETHER_TYPE_VLAN); - else - eth_h->ether_type = rte_cpu_to_be_16(ETHER_TYPE_ARP); - - arp_h = (struct arp_hdr *)((char *)eth_h + sizeof(struct ether_hdr) - + client_info->vlan_count * sizeof(struct vlan_hdr)); - - memcpy(eth_h + 1, client_info->vlan, - client_info->vlan_count * sizeof(struct vlan_hdr)); - - ether_addr_copy(&client_info->app_mac, &arp_h->arp_data.arp_sha); - arp_h->arp_data.arp_sip = client_info->app_ip; - ether_addr_copy(&client_info->cli_mac, &arp_h->arp_data.arp_tha); - arp_h->arp_data.arp_tip = client_info->cli_ip; - - arp_h->arp_hrd = rte_cpu_to_be_16(ARP_HRD_ETHER); - arp_h->arp_pro = rte_cpu_to_be_16(ETHER_TYPE_IPv4); - arp_h->arp_hln = ETHER_ADDR_LEN; - arp_h->arp_pln = sizeof(uint32_t); - arp_h->arp_op = rte_cpu_to_be_16(ARP_OP_REPLY); - - slave_idx = client_info->slave_idx; - rte_spinlock_unlock(&internals->mode6.lock); - - return slave_idx; -} - -void -bond_mode_alb_client_list_upd(struct rte_eth_dev *bond_dev) -{ - struct bond_dev_private *internals = bond_dev->data->dev_private; - struct client_data *client_info; - - int i; - - /* If active slave count is 0, it's pointless to refresh alb table */ - if (internals->active_slave_count <= 0) - return; - - rte_spinlock_lock(&internals->mode6.lock); - internals->mode6.last_slave = ALB_NULL_INDEX; - - for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) { - client_info = &internals->mode6.client_table[i]; - if (client_info->in_use) { - client_info->slave_idx = calculate_slave(internals); - rte_eth_macaddr_get(client_info->slave_idx, &client_info->app_mac); - internals->mode6.ntt = 1; - } - } - rte_spinlock_unlock(&internals->mode6.lock); -} diff --git a/lib/librte_pmd_bond/rte_eth_bond_alb.h b/lib/librte_pmd_bond/rte_eth_bond_alb.h deleted file mode 100644 index fd7c3ae..0000000 --- a/lib/librte_pmd_bond/rte_eth_bond_alb.h +++ /dev/null @@ -1,142 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef RTE_ETH_BOND_ALB_H_ -#define RTE_ETH_BOND_ALB_H_ - -#include -#include - -#define ALB_HASH_TABLE_SIZE 256 -#define ALB_NULL_INDEX 0xFFFFFFFF - -struct client_data { - /** ARP data of single client */ - struct ether_addr app_mac; - /**< MAC address of application running DPDK */ - uint32_t app_ip; - /**< IP address of application running DPDK */ - struct ether_addr cli_mac; - /**< Client MAC address */ - uint32_t cli_ip; - /**< Client IP address */ - - uint8_t slave_idx; - /**< Index of slave on which we connect with that client */ - uint8_t in_use; - /**< Flag indicating if entry in client table is currently used */ - uint8_t ntt; - /**< Flag indicating if we need to send update to this client on next tx */ - - struct vlan_hdr vlan[2]; - /**< Content of vlan headers */ - uint8_t vlan_count; - /**< Number of nested vlan headers */ -}; - -struct mode_alb_private { - struct client_data client_table[ALB_HASH_TABLE_SIZE]; - /**< Hash table storing ARP data of every client connected */ - struct rte_mempool *mempool; - /**< Mempool for creating ARP update packets */ - uint8_t ntt; - /**< Flag indicating if we need to send update to any client on next tx */ - uint32_t last_slave; - /**< Index of last used slave in client table */ - rte_spinlock_t lock; -}; - -/** - * ALB mode initialization. - * - * @param bond_dev Pointer to bonding device. - * - * @return - * Error code - 0 on success. - */ -int -bond_mode_alb_enable(struct rte_eth_dev *bond_dev); - -/** - * Function handles ARP packet reception. If received ARP request, it is - * forwarded to application without changes. If it is ARP reply, client table - * is updated. - * - * @param eth_h ETH header of received packet. - * @param offset Vlan header offset. - * @param internals Bonding data. - */ -void -bond_mode_alb_arp_recv(struct ether_hdr *eth_h, uint16_t offset, - struct bond_dev_private *internals); - -/** - * Function handles ARP packet transmission. It also decides on which slave - * send that packet. If packet is ARP Request, it is send on primary slave. - * If it is ARP Reply, it is send on slave stored in client table for that - * connection. On Reply function also updates data in client table. - * - * @param eth_h ETH header of transmitted packet. - * @param offset Vlan header offset. - * @param internals Bonding data. - * - * @return - * Index of slave on which packet should be sent. - */ -uint8_t -bond_mode_alb_arp_xmit(struct ether_hdr *eth_h, uint16_t offset, - struct bond_dev_private *internals); - -/** - * Function fills packet with ARP data from client_info. - * - * @param client_info Data of client to which packet is sent. - * @param pkt Pointer to packet which is sent. - * @param internals Bonding data. - * - * @return - * Index of slawe on which packet should be sent. - */ -uint8_t -bond_mode_alb_arp_upd(struct client_data *client_info, - struct rte_mbuf *pkt, struct bond_dev_private *internals); - -/** - * Function updates slave indexes of active connections. - * - * @param bond_dev Pointer to bonded device struct. - */ -void -bond_mode_alb_client_list_upd(struct rte_eth_dev *bond_dev); - -#endif /* RTE_ETH_BOND_ALB_H_ */ diff --git a/lib/librte_pmd_bond/rte_eth_bond_api.c b/lib/librte_pmd_bond/rte_eth_bond_api.c deleted file mode 100644 index e91a623..0000000 --- a/lib/librte_pmd_bond/rte_eth_bond_api.c +++ /dev/null @@ -1,840 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include - -#include -#include -#include -#include - -#include "rte_eth_bond.h" -#include "rte_eth_bond_private.h" -#include "rte_eth_bond_8023ad_private.h" - -#define DEFAULT_POLLING_INTERVAL_10_MS (10) - -int -valid_bonded_ethdev(struct rte_eth_dev *eth_dev) -{ - size_t len; - - /* Check valid pointer */ - if (eth_dev->driver->pci_drv.name == NULL || driver_name == NULL) - return -1; - - /* Check string lengths are equal */ - len = strlen(driver_name); - if (strlen(eth_dev->driver->pci_drv.name) != len) - return -1; - - /* Compare strings */ - return strncmp(eth_dev->driver->pci_drv.name, driver_name, len); -} - -int -valid_port_id(uint8_t port_id) -{ - /* Verify that port id is valid */ - int ethdev_count = rte_eth_dev_count(); - if (port_id >= ethdev_count) { - RTE_BOND_LOG(ERR, "Port Id %d is greater than rte_eth_dev_count %d", - port_id, ethdev_count); - return -1; - } - - return 0; -} - -int -valid_bonded_port_id(uint8_t port_id) -{ - /* Verify that port id's are valid */ - if (valid_port_id(port_id)) - return -1; - - /* Verify that bonded_port_id refers to a bonded port */ - if (valid_bonded_ethdev(&rte_eth_devices[port_id])) { - RTE_BOND_LOG(ERR, "Specified port Id %d is not a bonded eth_dev device", - port_id); - return -1; - } - - return 0; -} - -int -valid_slave_port_id(uint8_t port_id) -{ - /* Verify that port id's are valid */ - if (valid_port_id(port_id)) - return -1; - - /* Verify that port_id refers to a non bonded port */ - if (!valid_bonded_ethdev(&rte_eth_devices[port_id])) - return -1; - - return 0; -} - -void -activate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id) -{ - struct bond_dev_private *internals = eth_dev->data->dev_private; - uint8_t active_count = internals->active_slave_count; - - if (internals->mode == BONDING_MODE_8023AD) - bond_mode_8023ad_activate_slave(eth_dev, port_id); - - if (internals->mode == BONDING_MODE_TLB - || internals->mode == BONDING_MODE_ALB) { - - internals->tlb_slaves_order[active_count] = port_id; - } - - RTE_VERIFY(internals->active_slave_count < - (RTE_DIM(internals->active_slaves) - 1)); - - internals->active_slaves[internals->active_slave_count] = port_id; - internals->active_slave_count++; - - if (internals->mode == BONDING_MODE_TLB) - bond_tlb_activate_slave(internals); - if (internals->mode == BONDING_MODE_ALB) - bond_mode_alb_client_list_upd(eth_dev); -} - -void -deactivate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id) -{ - uint8_t slave_pos; - struct bond_dev_private *internals = eth_dev->data->dev_private; - uint8_t active_count = internals->active_slave_count; - - if (internals->mode == BONDING_MODE_8023AD) { - bond_mode_8023ad_stop(eth_dev); - bond_mode_8023ad_deactivate_slave(eth_dev, port_id); - } else if (internals->mode == BONDING_MODE_TLB - || internals->mode == BONDING_MODE_ALB) - bond_tlb_disable(internals); - - slave_pos = find_slave_by_id(internals->active_slaves, active_count, - port_id); - - /* If slave was not at the end of the list - * shift active slaves up active array list */ - if (slave_pos < active_count) { - active_count--; - memmove(internals->active_slaves + slave_pos, - internals->active_slaves + slave_pos + 1, - (active_count - slave_pos) * - sizeof(internals->active_slaves[0])); - } - - RTE_VERIFY(active_count < RTE_DIM(internals->active_slaves)); - internals->active_slave_count = active_count; - - if (eth_dev->data->dev_started) { - if (internals->mode == BONDING_MODE_8023AD) { - bond_mode_8023ad_start(eth_dev); - } else if (internals->mode == BONDING_MODE_TLB) { - bond_tlb_enable(internals); - } else if (internals->mode == BONDING_MODE_ALB) { - bond_tlb_enable(internals); - bond_mode_alb_client_list_upd(eth_dev); - } - } -} - -uint8_t -number_of_sockets(void) -{ - int sockets = 0; - int i; - const struct rte_memseg *ms = rte_eal_get_physmem_layout(); - - for (i = 0; ((i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL)); i++) { - if (sockets < ms[i].socket_id) - sockets = ms[i].socket_id; - } - - /* Number of sockets = maximum socket_id + 1 */ - return ++sockets; -} - -const char *driver_name = "Link Bonding PMD"; - -int -rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id) -{ - struct rte_pci_device *pci_dev = NULL; - struct bond_dev_private *internals = NULL; - struct rte_eth_dev *eth_dev = NULL; - struct eth_driver *eth_drv = NULL; - struct rte_pci_driver *pci_drv = NULL; - struct rte_pci_id *pci_id_table = NULL; - /* now do all data allocation - for eth_dev structure, dummy pci driver - * and internal (private) data - */ - - if (name == NULL) { - RTE_BOND_LOG(ERR, "Invalid name specified"); - goto err; - } - - if (socket_id >= number_of_sockets()) { - RTE_BOND_LOG(ERR, - "Invalid socket id specified to create bonded device on."); - goto err; - } - - pci_dev = rte_zmalloc_socket(name, sizeof(*pci_dev), 0, socket_id); - if (pci_dev == NULL) { - RTE_BOND_LOG(ERR, "Unable to malloc pci dev on socket"); - goto err; - } - - eth_drv = rte_zmalloc_socket(name, sizeof(*eth_drv), 0, socket_id); - if (eth_drv == NULL) { - RTE_BOND_LOG(ERR, "Unable to malloc eth_drv on socket"); - goto err; - } - - pci_drv = ð_drv->pci_drv; - - pci_id_table = rte_zmalloc_socket(name, sizeof(*pci_id_table), 0, socket_id); - if (pci_id_table == NULL) { - RTE_BOND_LOG(ERR, "Unable to malloc pci_id_table on socket"); - goto err; - } - pci_id_table->device_id = PCI_ANY_ID; - pci_id_table->subsystem_device_id = PCI_ANY_ID; - pci_id_table->vendor_id = PCI_ANY_ID; - pci_id_table->subsystem_vendor_id = PCI_ANY_ID; - - pci_drv->id_table = pci_id_table; - pci_drv->drv_flags = RTE_PCI_DRV_INTR_LSC; - - internals = rte_zmalloc_socket(name, sizeof(*internals), 0, socket_id); - if (internals == NULL) { - RTE_BOND_LOG(ERR, "Unable to malloc internals on socket"); - goto err; - } - - /* reserve an ethdev entry */ - eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL); - if (eth_dev == NULL) { - RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev"); - goto err; - } - - pci_dev->numa_node = socket_id; - pci_drv->name = driver_name; - - eth_dev->driver = eth_drv; - eth_dev->data->dev_private = internals; - eth_dev->data->nb_rx_queues = (uint16_t)1; - eth_dev->data->nb_tx_queues = (uint16_t)1; - - TAILQ_INIT(&(eth_dev->link_intr_cbs)); - - eth_dev->data->dev_link.link_status = 0; - - eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0, - socket_id); - - eth_dev->data->dev_started = 0; - eth_dev->data->promiscuous = 0; - eth_dev->data->scattered_rx = 0; - eth_dev->data->all_multicast = 0; - - eth_dev->dev_ops = &default_dev_ops; - eth_dev->pci_dev = pci_dev; - - rte_spinlock_init(&internals->lock); - - internals->port_id = eth_dev->data->port_id; - internals->mode = BONDING_MODE_INVALID; - internals->current_primary_port = 0; - internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2; - internals->xmit_hash = xmit_l2_hash; - internals->user_defined_mac = 0; - internals->link_props_set = 0; - - internals->link_status_polling_enabled = 0; - - internals->link_status_polling_interval_ms = DEFAULT_POLLING_INTERVAL_10_MS; - internals->link_down_delay_ms = 0; - internals->link_up_delay_ms = 0; - - internals->slave_count = 0; - internals->active_slave_count = 0; - internals->rx_offload_capa = 0; - internals->tx_offload_capa = 0; - - memset(internals->active_slaves, 0, sizeof(internals->active_slaves)); - memset(internals->slaves, 0, sizeof(internals->slaves)); - - /* Set mode 4 default configuration */ - bond_mode_8023ad_setup(eth_dev, NULL); - if (bond_ethdev_mode_set(eth_dev, mode)) { - RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d", - eth_dev->data->port_id, mode); - goto err; - } - - return eth_dev->data->port_id; - -err: - rte_free(pci_dev); - rte_free(pci_id_table); - rte_free(eth_drv); - rte_free(internals); - - return -1; -} - -static int -__eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id) -{ - struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev; - struct bond_dev_private *internals; - struct bond_dev_private *temp_internals; - struct rte_eth_link link_props; - struct rte_eth_dev_info dev_info; - - int i, j; - - if (valid_slave_port_id(slave_port_id) != 0) - return -1; - - bonded_eth_dev = &rte_eth_devices[bonded_port_id]; - internals = bonded_eth_dev->data->dev_private; - - /* Verify that new slave device is not already a slave of another - * bonded device */ - for (i = rte_eth_dev_count()-1; i >= 0; i--) { - if (valid_bonded_ethdev(&rte_eth_devices[i]) == 0) { - temp_internals = rte_eth_devices[i].data->dev_private; - - for (j = 0; j < temp_internals->slave_count; j++) { - /* Device already a slave of a bonded device */ - if (temp_internals->slaves[j].port_id == slave_port_id) { - RTE_BOND_LOG(ERR, "Slave port %d is already a slave", - slave_port_id); - return -1; - } - } - } - } - - slave_eth_dev = &rte_eth_devices[slave_port_id]; - - /* Add slave details to bonded device */ - slave_add(internals, slave_eth_dev); - - memset(&dev_info, 0, sizeof(dev_info)); - rte_eth_dev_info_get(slave_port_id, &dev_info); - - if (internals->slave_count < 1) { - /* if MAC is not user defined then use MAC of first slave add to - * bonded device */ - if (!internals->user_defined_mac) - mac_address_set(bonded_eth_dev, slave_eth_dev->data->mac_addrs); - - /* Inherit eth dev link properties from first slave */ - link_properties_set(bonded_eth_dev, - &(slave_eth_dev->data->dev_link)); - - /* Make primary slave */ - internals->primary_port = slave_port_id; - - /* Take the first dev's offload capabilities */ - internals->rx_offload_capa = dev_info.rx_offload_capa; - internals->tx_offload_capa = dev_info.tx_offload_capa; - - } else { - /* Check slave link properties are supported if props are set, - * all slaves must be the same */ - if (internals->link_props_set) { - if (link_properties_valid(&(bonded_eth_dev->data->dev_link), - &(slave_eth_dev->data->dev_link))) { - RTE_BOND_LOG(ERR, - "Slave port %d link speed/duplex not supported", - slave_port_id); - return -1; - } - } else { - link_properties_set(bonded_eth_dev, - &(slave_eth_dev->data->dev_link)); - } - internals->rx_offload_capa &= dev_info.rx_offload_capa; - internals->tx_offload_capa &= dev_info.tx_offload_capa; - } - - internals->slave_count++; - - /* Update all slave devices MACs*/ - mac_address_slaves_update(bonded_eth_dev); - - if (bonded_eth_dev->data->dev_started) { - if (slave_configure(bonded_eth_dev, slave_eth_dev) != 0) { - RTE_BOND_LOG(ERR, "rte_bond_slaves_configure: port=%d", - slave_port_id); - return -1; - } - } - - /* Register link status change callback with bonded device pointer as - * argument*/ - rte_eth_dev_callback_register(slave_port_id, RTE_ETH_EVENT_INTR_LSC, - bond_ethdev_lsc_event_callback, &bonded_eth_dev->data->port_id); - - /* If bonded device is started then we can add the slave to our active - * slave array */ - if (bonded_eth_dev->data->dev_started) { - rte_eth_link_get_nowait(slave_port_id, &link_props); - - if (link_props.link_status == 1) - activate_slave(bonded_eth_dev, slave_port_id); - } - return 0; - -} - -int -rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id) -{ - struct rte_eth_dev *bonded_eth_dev; - struct bond_dev_private *internals; - - int retval; - - /* Verify that port id's are valid bonded and slave ports */ - if (valid_bonded_port_id(bonded_port_id) != 0) - return -1; - - bonded_eth_dev = &rte_eth_devices[bonded_port_id]; - internals = bonded_eth_dev->data->dev_private; - - rte_spinlock_lock(&internals->lock); - - retval = __eth_bond_slave_add_lock_free(bonded_port_id, slave_port_id); - - rte_spinlock_unlock(&internals->lock); - - return retval; -} - -static int -__eth_bond_slave_remove_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id) -{ - struct rte_eth_dev *bonded_eth_dev; - struct bond_dev_private *internals; - - int i, slave_idx; - - if (valid_slave_port_id(slave_port_id) != 0) - return -1; - - bonded_eth_dev = &rte_eth_devices[bonded_port_id]; - internals = bonded_eth_dev->data->dev_private; - - /* first remove from active slave list */ - slave_idx = find_slave_by_id(internals->active_slaves, - internals->active_slave_count, slave_port_id); - - if (slave_idx < internals->active_slave_count) - deactivate_slave(bonded_eth_dev, slave_port_id); - - slave_idx = -1; - /* now find in slave list */ - for (i = 0; i < internals->slave_count; i++) - if (internals->slaves[i].port_id == slave_port_id) { - slave_idx = i; - break; - } - - if (slave_idx < 0) { - RTE_BOND_LOG(ERR, "Couldn't find slave in port list, slave count %d", - internals->slave_count); - return -1; - } - - /* Un-register link status change callback with bonded device pointer as - * argument*/ - rte_eth_dev_callback_unregister(slave_port_id, RTE_ETH_EVENT_INTR_LSC, - bond_ethdev_lsc_event_callback, - &rte_eth_devices[bonded_port_id].data->port_id); - - /* Restore original MAC address of slave device */ - mac_address_set(&rte_eth_devices[slave_port_id], - &(internals->slaves[slave_idx].persisted_mac_addr)); - - slave_remove(internals, &rte_eth_devices[slave_port_id]); - - /* first slave in the active list will be the primary by default, - * otherwise use first device in list */ - if (internals->current_primary_port == slave_port_id) { - if (internals->active_slave_count > 0) - internals->current_primary_port = internals->active_slaves[0]; - else if (internals->slave_count > 0) - internals->current_primary_port = internals->slaves[0].port_id; - else - internals->primary_port = 0; - } - - if (internals->active_slave_count < 1) { - /* reset device link properties as no slaves are active */ - link_properties_reset(&rte_eth_devices[bonded_port_id]); - - /* if no slaves are any longer attached to bonded device and MAC is not - * user defined then clear MAC of bonded device as it will be reset - * when a new slave is added */ - if (internals->slave_count < 1 && !internals->user_defined_mac) - memset(rte_eth_devices[bonded_port_id].data->mac_addrs, 0, - sizeof(*(rte_eth_devices[bonded_port_id].data->mac_addrs))); - } - if (internals->slave_count == 0) { - internals->rx_offload_capa = 0; - internals->tx_offload_capa = 0; - } - return 0; -} - -int -rte_eth_bond_slave_remove(uint8_t bonded_port_id, uint8_t slave_port_id) -{ - struct rte_eth_dev *bonded_eth_dev; - struct bond_dev_private *internals; - int retval; - - if (valid_bonded_port_id(bonded_port_id) != 0) - return -1; - - bonded_eth_dev = &rte_eth_devices[bonded_port_id]; - internals = bonded_eth_dev->data->dev_private; - - rte_spinlock_lock(&internals->lock); - - retval = __eth_bond_slave_remove_lock_free(bonded_port_id, slave_port_id); - - rte_spinlock_unlock(&internals->lock); - - return retval; -} - -int -rte_eth_bond_mode_set(uint8_t bonded_port_id, uint8_t mode) -{ - if (valid_bonded_port_id(bonded_port_id) != 0) - return -1; - - return bond_ethdev_mode_set(&rte_eth_devices[bonded_port_id], mode); -} - -int -rte_eth_bond_mode_get(uint8_t bonded_port_id) -{ - struct bond_dev_private *internals; - - if (valid_bonded_port_id(bonded_port_id) != 0) - return -1; - - internals = rte_eth_devices[bonded_port_id].data->dev_private; - - return internals->mode; -} - -int -rte_eth_bond_primary_set(uint8_t bonded_port_id, uint8_t slave_port_id) -{ - struct bond_dev_private *internals; - - if (valid_bonded_port_id(bonded_port_id) != 0) - return -1; - - if (valid_slave_port_id(slave_port_id) != 0) - return -1; - - internals = rte_eth_devices[bonded_port_id].data->dev_private; - - internals->user_defined_primary_port = 1; - internals->primary_port = slave_port_id; - - bond_ethdev_primary_set(internals, slave_port_id); - - return 0; -} - -int -rte_eth_bond_primary_get(uint8_t bonded_port_id) -{ - struct bond_dev_private *internals; - - if (valid_bonded_port_id(bonded_port_id) != 0) - return -1; - - internals = rte_eth_devices[bonded_port_id].data->dev_private; - - if (internals->slave_count < 1) - return -1; - - return internals->current_primary_port; -} - -int -rte_eth_bond_slaves_get(uint8_t bonded_port_id, uint8_t slaves[], uint8_t len) -{ - struct bond_dev_private *internals; - uint8_t i; - - if (valid_bonded_port_id(bonded_port_id) != 0) - return -1; - - if (slaves == NULL) - return -1; - - internals = rte_eth_devices[bonded_port_id].data->dev_private; - - if (internals->slave_count > len) - return -1; - - for (i = 0; i < internals->slave_count; i++) - slaves[i] = internals->slaves[i].port_id; - - return internals->slave_count; -} - -int -rte_eth_bond_active_slaves_get(uint8_t bonded_port_id, uint8_t slaves[], - uint8_t len) -{ - struct bond_dev_private *internals; - - if (valid_bonded_port_id(bonded_port_id) != 0) - return -1; - - if (slaves == NULL) - return -1; - - internals = rte_eth_devices[bonded_port_id].data->dev_private; - - if (internals->active_slave_count > len) - return -1; - - memcpy(slaves, internals->active_slaves, internals->active_slave_count); - - return internals->active_slave_count; -} - -int -rte_eth_bond_mac_address_set(uint8_t bonded_port_id, - struct ether_addr *mac_addr) -{ - struct rte_eth_dev *bonded_eth_dev; - struct bond_dev_private *internals; - - if (valid_bonded_port_id(bonded_port_id) != 0) - return -1; - - bonded_eth_dev = &rte_eth_devices[bonded_port_id]; - internals = bonded_eth_dev->data->dev_private; - - /* Set MAC Address of Bonded Device */ - if (mac_address_set(bonded_eth_dev, mac_addr)) - return -1; - - internals->user_defined_mac = 1; - - /* Update all slave devices MACs*/ - if (internals->slave_count > 0) - return mac_address_slaves_update(bonded_eth_dev); - - return 0; -} - -int -rte_eth_bond_mac_address_reset(uint8_t bonded_port_id) -{ - struct rte_eth_dev *bonded_eth_dev; - struct bond_dev_private *internals; - - if (valid_bonded_port_id(bonded_port_id) != 0) - return -1; - - bonded_eth_dev = &rte_eth_devices[bonded_port_id]; - internals = bonded_eth_dev->data->dev_private; - - internals->user_defined_mac = 0; - - if (internals->slave_count > 0) { - /* Set MAC Address of Bonded Device */ - if (mac_address_set(bonded_eth_dev, - &internals->slaves[internals->primary_port].persisted_mac_addr) - != 0) { - RTE_BOND_LOG(ERR, "Failed to set MAC address on bonded device"); - return -1; - } - /* Update all slave devices MAC addresses */ - return mac_address_slaves_update(bonded_eth_dev); - } - /* No need to update anything as no slaves present */ - return 0; -} - -int -rte_eth_bond_xmit_policy_set(uint8_t bonded_port_id, uint8_t policy) -{ - struct bond_dev_private *internals; - - if (valid_bonded_port_id(bonded_port_id) != 0) - return -1; - - internals = rte_eth_devices[bonded_port_id].data->dev_private; - - switch (policy) { - case BALANCE_XMIT_POLICY_LAYER2: - internals->balance_xmit_policy = policy; - internals->xmit_hash = xmit_l2_hash; - break; - case BALANCE_XMIT_POLICY_LAYER23: - internals->balance_xmit_policy = policy; - internals->xmit_hash = xmit_l23_hash; - break; - case BALANCE_XMIT_POLICY_LAYER34: - internals->balance_xmit_policy = policy; - internals->xmit_hash = xmit_l34_hash; - break; - - default: - return -1; - } - return 0; -} - -int -rte_eth_bond_xmit_policy_get(uint8_t bonded_port_id) -{ - struct bond_dev_private *internals; - - if (valid_bonded_port_id(bonded_port_id) != 0) - return -1; - - internals = rte_eth_devices[bonded_port_id].data->dev_private; - - return internals->balance_xmit_policy; -} - -int -rte_eth_bond_link_monitoring_set(uint8_t bonded_port_id, uint32_t internal_ms) -{ - struct bond_dev_private *internals; - - if (valid_bonded_port_id(bonded_port_id) != 0) - return -1; - - internals = rte_eth_devices[bonded_port_id].data->dev_private; - internals->link_status_polling_interval_ms = internal_ms; - - return 0; -} - -int -rte_eth_bond_link_monitoring_get(uint8_t bonded_port_id) -{ - struct bond_dev_private *internals; - - if (valid_bonded_port_id(bonded_port_id) != 0) - return -1; - - internals = rte_eth_devices[bonded_port_id].data->dev_private; - - return internals->link_status_polling_interval_ms; -} - -int -rte_eth_bond_link_down_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms) - -{ - struct bond_dev_private *internals; - - if (valid_bonded_port_id(bonded_port_id) != 0) - return -1; - - internals = rte_eth_devices[bonded_port_id].data->dev_private; - internals->link_down_delay_ms = delay_ms; - - return 0; -} - -int -rte_eth_bond_link_down_prop_delay_get(uint8_t bonded_port_id) -{ - struct bond_dev_private *internals; - - if (valid_bonded_port_id(bonded_port_id) != 0) - return -1; - - internals = rte_eth_devices[bonded_port_id].data->dev_private; - - return internals->link_down_delay_ms; -} - -int -rte_eth_bond_link_up_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms) - -{ - struct bond_dev_private *internals; - - if (valid_bonded_port_id(bonded_port_id) != 0) - return -1; - - internals = rte_eth_devices[bonded_port_id].data->dev_private; - internals->link_up_delay_ms = delay_ms; - - return 0; -} - -int -rte_eth_bond_link_up_prop_delay_get(uint8_t bonded_port_id) -{ - struct bond_dev_private *internals; - - if (valid_bonded_port_id(bonded_port_id) != 0) - return -1; - - internals = rte_eth_devices[bonded_port_id].data->dev_private; - - return internals->link_up_delay_ms; -} diff --git a/lib/librte_pmd_bond/rte_eth_bond_args.c b/lib/librte_pmd_bond/rte_eth_bond_args.c deleted file mode 100644 index 02ecde6..0000000 --- a/lib/librte_pmd_bond/rte_eth_bond_args.c +++ /dev/null @@ -1,278 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include - -#include -#include - -#include "rte_eth_bond.h" -#include "rte_eth_bond_private.h" - -const char *pmd_bond_init_valid_arguments[] = { - PMD_BOND_SLAVE_PORT_KVARG, - PMD_BOND_PRIMARY_SLAVE_KVARG, - PMD_BOND_MODE_KVARG, - PMD_BOND_XMIT_POLICY_KVARG, - PMD_BOND_SOCKET_ID_KVARG, - PMD_BOND_MAC_ADDR_KVARG, - - NULL -}; - -static inline int -find_port_id_by_pci_addr(const struct rte_pci_addr *pci_addr) -{ - struct rte_pci_addr *eth_pci_addr; - unsigned i; - - for (i = 0; i < rte_eth_dev_count(); i++) { - - if (rte_eth_devices[i].pci_dev == NULL) - continue; - - eth_pci_addr = &(rte_eth_devices[i].pci_dev->addr); - - if (pci_addr->bus == eth_pci_addr->bus && - pci_addr->devid == eth_pci_addr->devid && - pci_addr->domain == eth_pci_addr->domain && - pci_addr->function == eth_pci_addr->function) - return i; - } - return -1; -} - -static inline int -find_port_id_by_dev_name(const char *name) -{ - unsigned i; - - for (i = 0; i < rte_eth_dev_count(); i++) { - if (rte_eth_devices[i].data == NULL) - continue; - - if (strcmp(rte_eth_devices[i].data->name, name) == 0) - return i; - } - return -1; -} - -/** - * Parses a port identifier string to a port id by pci address, then by name, - * and finally port id. - */ -static inline int -parse_port_id(const char *port_str) -{ - struct rte_pci_addr dev_addr; - int port_id; - - /* try parsing as pci address, physical devices */ - if (eal_parse_pci_DomBDF(port_str, &dev_addr) == 0) { - port_id = find_port_id_by_pci_addr(&dev_addr); - if (port_id < 0) - return -1; - } else { - /* try parsing as device name, virtual devices */ - port_id = find_port_id_by_dev_name(port_str); - if (port_id < 0) { - char *end; - errno = 0; - - /* try parsing as port id */ - port_id = strtol(port_str, &end, 10); - if (*end != 0 || errno != 0) - return -1; - } - } - - if (port_id < 0 || port_id > RTE_MAX_ETHPORTS) { - RTE_BOND_LOG(ERR, "Slave port specified (%s) outside expected range", - port_str); - return -1; - } - return port_id; -} - -int -bond_ethdev_parse_slave_port_kvarg(const char *key __rte_unused, - const char *value, void *extra_args) -{ - struct bond_ethdev_slave_ports *slave_ports; - - if (value == NULL || extra_args == NULL) - return -1; - - slave_ports = extra_args; - - if (strcmp(key, PMD_BOND_SLAVE_PORT_KVARG) == 0) { - int port_id = parse_port_id(value); - if (port_id < 0) { - RTE_BOND_LOG(ERR, "Invalid slave port value (%s) specified", value); - return -1; - } else - slave_ports->slaves[slave_ports->slave_count++] = - (uint8_t)port_id; - } - return 0; -} - -int -bond_ethdev_parse_slave_mode_kvarg(const char *key __rte_unused, - const char *value, void *extra_args) -{ - uint8_t *mode; - char *endptr; - - if (value == NULL || extra_args == NULL) - return -1; - - mode = extra_args; - - errno = 0; - *mode = strtol(value, &endptr, 10); - if (*endptr != 0 || errno != 0) - return -1; - - /* validate mode value */ - switch (*mode) { - case BONDING_MODE_ROUND_ROBIN: - case BONDING_MODE_ACTIVE_BACKUP: - case BONDING_MODE_BALANCE: - case BONDING_MODE_BROADCAST: - case BONDING_MODE_8023AD: - case BONDING_MODE_TLB: - case BONDING_MODE_ALB: - return 0; - default: - RTE_BOND_LOG(ERR, "Invalid slave mode value (%s) specified", value); - return -1; - } -} - -int -bond_ethdev_parse_socket_id_kvarg(const char *key __rte_unused, - const char *value, void *extra_args) -{ - int socket_id; - char *endptr; - - if (value == NULL || extra_args == NULL) - return -1; - - errno = 0; - socket_id = (uint8_t)strtol(value, &endptr, 10); - if (*endptr != 0 || errno != 0) - return -1; - - /* validate mode value */ - if (socket_id >= 0 && socket_id < number_of_sockets()) { - *(uint8_t *)extra_args = (uint8_t)socket_id; - return 0; - } - return -1; -} - -int -bond_ethdev_parse_primary_slave_port_id_kvarg(const char *key __rte_unused, - const char *value, void *extra_args) -{ - int primary_slave_port_id; - - if (value == NULL || extra_args == NULL) - return -1; - - primary_slave_port_id = parse_port_id(value); - if (primary_slave_port_id < 0) - return -1; - - *(uint8_t *)extra_args = (uint8_t)primary_slave_port_id; - - return 0; -} - -int -bond_ethdev_parse_balance_xmit_policy_kvarg(const char *key __rte_unused, - const char *value, void *extra_args) -{ - uint8_t *xmit_policy; - - if (value == NULL || extra_args == NULL) - return -1; - - xmit_policy = extra_args; - - if (strcmp(PMD_BOND_XMIT_POLICY_LAYER2_KVARG, value) == 0) - *xmit_policy = BALANCE_XMIT_POLICY_LAYER2; - else if (strcmp(PMD_BOND_XMIT_POLICY_LAYER23_KVARG, value) == 0) - *xmit_policy = BALANCE_XMIT_POLICY_LAYER23; - else if (strcmp(PMD_BOND_XMIT_POLICY_LAYER34_KVARG, value) == 0) - *xmit_policy = BALANCE_XMIT_POLICY_LAYER34; - else - return -1; - - return 0; -} - -int -bond_ethdev_parse_bond_mac_addr_kvarg(const char *key __rte_unused, - const char *value, void *extra_args) -{ - if (value == NULL || extra_args == NULL) - return -1; - - /* Parse MAC */ - return cmdline_parse_etheraddr(NULL, value, extra_args, - sizeof(struct ether_addr)); -} - -int -bond_ethdev_parse_time_ms_kvarg(const char *key __rte_unused, - const char *value, void *extra_args) -{ - uint32_t time_ms; - char *endptr; - - if (value == NULL || extra_args == NULL) - return -1; - - errno = 0; - time_ms = (uint32_t)strtol(value, &endptr, 10); - if (*endptr != 0 || errno != 0) - return -1; - - *(uint32_t *)extra_args = time_ms; - - return 0; -} diff --git a/lib/librte_pmd_bond/rte_eth_bond_pmd.c b/lib/librte_pmd_bond/rte_eth_bond_pmd.c deleted file mode 100644 index c937e6b..0000000 --- a/lib/librte_pmd_bond/rte_eth_bond_pmd.c +++ /dev/null @@ -1,2269 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "rte_eth_bond.h" -#include "rte_eth_bond_private.h" -#include "rte_eth_bond_8023ad_private.h" - -#define REORDER_PERIOD_MS 10 - -#define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port) - -/* Table for statistics in mode 5 TLB */ -static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS]; - -static inline size_t -get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto) -{ - size_t vlan_offset = 0; - - if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) { - struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1); - - vlan_offset = sizeof(struct vlan_hdr); - *proto = vlan_hdr->eth_proto; - - if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) { - vlan_hdr = vlan_hdr + 1; - *proto = vlan_hdr->eth_proto; - vlan_offset += sizeof(struct vlan_hdr); - } - } - return vlan_offset; -} - -static uint16_t -bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) -{ - struct bond_dev_private *internals; - - uint16_t num_rx_slave = 0; - uint16_t num_rx_total = 0; - - int i; - - /* Cast to structure, containing bonded device's port id and queue id */ - struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue; - - internals = bd_rx_q->dev_private; - - - for (i = 0; i < internals->active_slave_count && nb_pkts; i++) { - /* Offset of pointer to *bufs increases as packets are received - * from other slaves */ - num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i], - bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts); - if (num_rx_slave) { - num_rx_total += num_rx_slave; - nb_pkts -= num_rx_slave; - } - } - - return num_rx_total; -} - -static uint16_t -bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs, - uint16_t nb_pkts) -{ - struct bond_dev_private *internals; - - /* Cast to structure, containing bonded device's port id and queue id */ - struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue; - - internals = bd_rx_q->dev_private; - - return rte_eth_rx_burst(internals->current_primary_port, - bd_rx_q->queue_id, bufs, nb_pkts); -} - -static uint16_t -bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, - uint16_t nb_pkts) -{ - /* Cast to structure, containing bonded device's port id and queue id */ - struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue; - struct bond_dev_private *internals = bd_rx_q->dev_private; - struct ether_addr bond_mac; - - struct ether_hdr *hdr; - - const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW); - uint16_t num_rx_total = 0; /* Total number of received packets */ - uint8_t slaves[RTE_MAX_ETHPORTS]; - uint8_t slave_count; - - uint8_t collecting; /* current slave collecting status */ - const uint8_t promisc = internals->promiscuous_en; - uint8_t i, j, k; - - rte_eth_macaddr_get(internals->port_id, &bond_mac); - /* Copy slave list to protect against slave up/down changes during tx - * bursting */ - slave_count = internals->active_slave_count; - memcpy(slaves, internals->active_slaves, - sizeof(internals->active_slaves[0]) * slave_count); - - for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) { - j = num_rx_total; - collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[i]], COLLECTING); - - /* Read packets from this slave */ - num_rx_total += rte_eth_rx_burst(slaves[i], bd_rx_q->queue_id, - &bufs[num_rx_total], nb_pkts - num_rx_total); - - for (k = j; k < 2 && k < num_rx_total; k++) - rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *)); - - /* Handle slow protocol packets. */ - while (j < num_rx_total) { - if (j + 3 < num_rx_total) - rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *)); - - hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *); - /* Remove packet from array if it is slow packet or slave is not - * in collecting state or bondign interface is not in promiscus - * mode and packet address does not match. */ - if (unlikely(hdr->ether_type == ether_type_slow_be || - !collecting || (!promisc && - !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) { - - if (hdr->ether_type == ether_type_slow_be) { - bond_mode_8023ad_handle_slow_pkt(internals, slaves[i], - bufs[j]); - } else - rte_pktmbuf_free(bufs[j]); - - /* Packet is managed by mode 4 or dropped, shift the array */ - num_rx_total--; - if (j < num_rx_total) { - memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) * - (num_rx_total - j)); - } - } else - j++; - } - } - - return num_rx_total; -} - -#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) -uint32_t burstnumberRX; -uint32_t burstnumberTX; - -#ifdef RTE_LIBRTE_BOND_DEBUG_ALB - -static void -arp_op_name(uint16_t arp_op, char *buf) -{ - switch (arp_op) { - case ARP_OP_REQUEST: - snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request"); - return; - case ARP_OP_REPLY: - snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply"); - return; - case ARP_OP_REVREQUEST: - snprintf(buf, sizeof("Reverse ARP Request"), "%s", - "Reverse ARP Request"); - return; - case ARP_OP_REVREPLY: - snprintf(buf, sizeof("Reverse ARP Reply"), "%s", - "Reverse ARP Reply"); - return; - case ARP_OP_INVREQUEST: - snprintf(buf, sizeof("Peer Identify Request"), "%s", - "Peer Identify Request"); - return; - case ARP_OP_INVREPLY: - snprintf(buf, sizeof("Peer Identify Reply"), "%s", - "Peer Identify Reply"); - return; - default: - break; - } - snprintf(buf, sizeof("Unknown"), "%s", "Unknown"); - return; -} -#endif -#define MaxIPv4String 16 -static void -ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size) -{ - uint32_t ipv4_addr; - - ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr); - snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF, - (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF, - ipv4_addr & 0xFF); -} - -#define MAX_CLIENTS_NUMBER 128 -uint8_t active_clients; -struct client_stats_t { - uint8_t port; - uint32_t ipv4_addr; - uint32_t ipv4_rx_packets; - uint32_t ipv4_tx_packets; -}; -struct client_stats_t client_stats[MAX_CLIENTS_NUMBER]; - -static void -update_client_stats(uint32_t addr, uint8_t port, uint32_t *TXorRXindicator) -{ - int i = 0; - - for (; i < MAX_CLIENTS_NUMBER; i++) { - if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) { - /* Just update RX packets number for this client */ - if (TXorRXindicator == &burstnumberRX) - client_stats[i].ipv4_rx_packets++; - else - client_stats[i].ipv4_tx_packets++; - return; - } - } - /* We have a new client. Insert him to the table, and increment stats */ - if (TXorRXindicator == &burstnumberRX) - client_stats[active_clients].ipv4_rx_packets++; - else - client_stats[active_clients].ipv4_tx_packets++; - client_stats[active_clients].ipv4_addr = addr; - client_stats[active_clients].port = port; - active_clients++; - -} - -#ifdef RTE_LIBRTE_BOND_DEBUG_ALB -#define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \ - RTE_LOG(DEBUG, PMD, \ - "%s " \ - "port:%d " \ - "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \ - "SrcIP:%s " \ - "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \ - "DstIP:%s " \ - "%s " \ - "%d\n", \ - info, \ - port, \ - eth_h->s_addr.addr_bytes[0], \ - eth_h->s_addr.addr_bytes[1], \ - eth_h->s_addr.addr_bytes[2], \ - eth_h->s_addr.addr_bytes[3], \ - eth_h->s_addr.addr_bytes[4], \ - eth_h->s_addr.addr_bytes[5], \ - src_ip, \ - eth_h->d_addr.addr_bytes[0], \ - eth_h->d_addr.addr_bytes[1], \ - eth_h->d_addr.addr_bytes[2], \ - eth_h->d_addr.addr_bytes[3], \ - eth_h->d_addr.addr_bytes[4], \ - eth_h->d_addr.addr_bytes[5], \ - dst_ip, \ - arp_op, \ - ++burstnumber) -#endif - -static void -mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h, - uint8_t port, uint32_t __attribute__((unused)) *burstnumber) -{ - struct ipv4_hdr *ipv4_h; -#ifdef RTE_LIBRTE_BOND_DEBUG_ALB - struct arp_hdr *arp_h; - char dst_ip[16]; - char ArpOp[24]; - char buf[16]; -#endif - char src_ip[16]; - - uint16_t ether_type = eth_h->ether_type; - uint16_t offset = get_vlan_offset(eth_h, ðer_type); - -#ifdef RTE_LIBRTE_BOND_DEBUG_ALB - snprintf(buf, 16, "%s", info); -#endif - - if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) { - ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset); - ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String); -#ifdef RTE_LIBRTE_BOND_DEBUG_ALB - ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String); - MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber); -#endif - update_client_stats(ipv4_h->src_addr, port, burstnumber); - } -#ifdef RTE_LIBRTE_BOND_DEBUG_ALB - else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) { - arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset); - ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String); - ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String); - arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp); - MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber); - } -#endif -} -#endif - -static uint16_t -bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) -{ - struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; - struct bond_dev_private *internals = bd_tx_q->dev_private; - struct ether_hdr *eth_h; - uint16_t ether_type, offset; - uint16_t nb_recv_pkts; - int i; - - nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts); - - for (i = 0; i < nb_recv_pkts; i++) { - eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *); - ether_type = eth_h->ether_type; - offset = get_vlan_offset(eth_h, ðer_type); - - if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) { -#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) - mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX); -#endif - bond_mode_alb_arp_recv(eth_h, offset, internals); - } -#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) - else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) - mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX); -#endif - } - - return nb_recv_pkts; -} - -static uint16_t -bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs, - uint16_t nb_pkts) -{ - struct bond_dev_private *internals; - struct bond_tx_queue *bd_tx_q; - - struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts]; - uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 }; - - uint8_t num_of_slaves; - uint8_t slaves[RTE_MAX_ETHPORTS]; - - uint16_t num_tx_total = 0, num_tx_slave; - - static int slave_idx = 0; - int i, cslave_idx = 0, tx_fail_total = 0; - - bd_tx_q = (struct bond_tx_queue *)queue; - internals = bd_tx_q->dev_private; - - /* Copy slave list to protect against slave up/down changes during tx - * bursting */ - num_of_slaves = internals->active_slave_count; - memcpy(slaves, internals->active_slaves, - sizeof(internals->active_slaves[0]) * num_of_slaves); - - if (num_of_slaves < 1) - return num_tx_total; - - /* Populate slaves mbuf with which packets are to be sent on it */ - for (i = 0; i < nb_pkts; i++) { - cslave_idx = (slave_idx + i) % num_of_slaves; - slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i]; - } - - /* increment current slave index so the next call to tx burst starts on the - * next slave */ - slave_idx = ++cslave_idx; - - /* Send packet burst on each slave device */ - for (i = 0; i < num_of_slaves; i++) { - if (slave_nb_pkts[i] > 0) { - num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, - slave_bufs[i], slave_nb_pkts[i]); - - /* if tx burst fails move packets to end of bufs */ - if (unlikely(num_tx_slave < slave_nb_pkts[i])) { - int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave; - - tx_fail_total += tx_fail_slave; - - memcpy(&bufs[nb_pkts - tx_fail_total], - &slave_bufs[i][num_tx_slave], - tx_fail_slave * sizeof(bufs[0])); - } - num_tx_total += num_tx_slave; - } - } - - return num_tx_total; -} - -static uint16_t -bond_ethdev_tx_burst_active_backup(void *queue, - struct rte_mbuf **bufs, uint16_t nb_pkts) -{ - struct bond_dev_private *internals; - struct bond_tx_queue *bd_tx_q; - - bd_tx_q = (struct bond_tx_queue *)queue; - internals = bd_tx_q->dev_private; - - if (internals->active_slave_count < 1) - return 0; - - return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id, - bufs, nb_pkts); -} - -static inline uint16_t -ether_hash(struct ether_hdr *eth_hdr) -{ - uint16_t *word_src_addr = (uint16_t *)eth_hdr->s_addr.addr_bytes; - uint16_t *word_dst_addr = (uint16_t *)eth_hdr->d_addr.addr_bytes; - - return (word_src_addr[0] ^ word_dst_addr[0]) ^ - (word_src_addr[1] ^ word_dst_addr[1]) ^ - (word_src_addr[2] ^ word_dst_addr[2]); -} - -static inline uint32_t -ipv4_hash(struct ipv4_hdr *ipv4_hdr) -{ - return (ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr); -} - -static inline uint32_t -ipv6_hash(struct ipv6_hdr *ipv6_hdr) -{ - uint32_t *word_src_addr = (uint32_t *)&(ipv6_hdr->src_addr[0]); - uint32_t *word_dst_addr = (uint32_t *)&(ipv6_hdr->dst_addr[0]); - - return (word_src_addr[0] ^ word_dst_addr[0]) ^ - (word_src_addr[1] ^ word_dst_addr[1]) ^ - (word_src_addr[2] ^ word_dst_addr[2]) ^ - (word_src_addr[3] ^ word_dst_addr[3]); -} - -uint16_t -xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count) -{ - struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *); - - uint32_t hash = ether_hash(eth_hdr); - - return (hash ^= hash >> 8) % slave_count; -} - -uint16_t -xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count) -{ - struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *); - uint16_t proto = eth_hdr->ether_type; - size_t vlan_offset = get_vlan_offset(eth_hdr, &proto); - uint32_t hash, l3hash = 0; - - hash = ether_hash(eth_hdr); - - if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) { - struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *) - ((char *)(eth_hdr + 1) + vlan_offset); - l3hash = ipv4_hash(ipv4_hdr); - - } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) { - struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *) - ((char *)(eth_hdr + 1) + vlan_offset); - l3hash = ipv6_hash(ipv6_hdr); - } - - hash = hash ^ l3hash; - hash ^= hash >> 16; - hash ^= hash >> 8; - - return hash % slave_count; -} - -uint16_t -xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count) -{ - struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *); - uint16_t proto = eth_hdr->ether_type; - size_t vlan_offset = get_vlan_offset(eth_hdr, &proto); - - struct udp_hdr *udp_hdr = NULL; - struct tcp_hdr *tcp_hdr = NULL; - uint32_t hash, l3hash = 0, l4hash = 0; - - if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) { - struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *) - ((char *)(eth_hdr + 1) + vlan_offset); - size_t ip_hdr_offset; - - l3hash = ipv4_hash(ipv4_hdr); - - ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) * - IPV4_IHL_MULTIPLIER; - - if (ipv4_hdr->next_proto_id == IPPROTO_TCP) { - tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + - ip_hdr_offset); - l4hash = HASH_L4_PORTS(tcp_hdr); - } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) { - udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr + - ip_hdr_offset); - l4hash = HASH_L4_PORTS(udp_hdr); - } - } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) { - struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *) - ((char *)(eth_hdr + 1) + vlan_offset); - l3hash = ipv6_hash(ipv6_hdr); - - if (ipv6_hdr->proto == IPPROTO_TCP) { - tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1); - l4hash = HASH_L4_PORTS(tcp_hdr); - } else if (ipv6_hdr->proto == IPPROTO_UDP) { - udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1); - l4hash = HASH_L4_PORTS(udp_hdr); - } - } - - hash = l3hash ^ l4hash; - hash ^= hash >> 16; - hash ^= hash >> 8; - - return hash % slave_count; -} - -struct bwg_slave { - uint64_t bwg_left_int; - uint64_t bwg_left_remainder; - uint8_t slave; -}; - -void -bond_tlb_activate_slave(struct bond_dev_private *internals) { - int i; - - for (i = 0; i < internals->active_slave_count; i++) { - tlb_last_obytets[internals->active_slaves[i]] = 0; - } -} - -static int -bandwidth_cmp(const void *a, const void *b) -{ - const struct bwg_slave *bwg_a = a; - const struct bwg_slave *bwg_b = b; - int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int; - int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder - - (int64_t)bwg_a->bwg_left_remainder; - if (diff > 0) - return 1; - else if (diff < 0) - return -1; - else if (diff2 > 0) - return 1; - else if (diff2 < 0) - return -1; - else - return 0; -} - -static void -bandwidth_left(uint8_t port_id, uint64_t load, uint8_t update_idx, - struct bwg_slave *bwg_slave) -{ - struct rte_eth_link link_status; - - rte_eth_link_get(port_id, &link_status); - uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8; - if (link_bwg == 0) - return; - link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS; - bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg; - bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg; -} - -static void -bond_ethdev_update_tlb_slave_cb(void *arg) -{ - struct bond_dev_private *internals = arg; - struct rte_eth_stats slave_stats; - struct bwg_slave bwg_array[RTE_MAX_ETHPORTS]; - uint8_t slave_count; - uint64_t tx_bytes; - - uint8_t update_stats = 0; - uint8_t i, slave_id; - - internals->slave_update_idx++; - - - if (internals->slave_update_idx >= REORDER_PERIOD_MS) - update_stats = 1; - - for (i = 0; i < internals->active_slave_count; i++) { - slave_id = internals->active_slaves[i]; - rte_eth_stats_get(slave_id, &slave_stats); - tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id]; - bandwidth_left(slave_id, tx_bytes, - internals->slave_update_idx, &bwg_array[i]); - bwg_array[i].slave = slave_id; - - if (update_stats) { - tlb_last_obytets[slave_id] = slave_stats.obytes; - } - } - - if (update_stats == 1) - internals->slave_update_idx = 0; - - slave_count = i; - qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp); - for (i = 0; i < slave_count; i++) - internals->tlb_slaves_order[i] = bwg_array[i].slave; - - rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb, - (struct bond_dev_private *)internals); -} - -static uint16_t -bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) -{ - struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; - struct bond_dev_private *internals = bd_tx_q->dev_private; - - struct rte_eth_dev *primary_port = - &rte_eth_devices[internals->primary_port]; - uint16_t num_tx_total = 0; - uint8_t i, j; - - uint8_t num_of_slaves = internals->active_slave_count; - uint8_t slaves[RTE_MAX_ETHPORTS]; - - struct ether_hdr *ether_hdr; - struct ether_addr primary_slave_addr; - struct ether_addr active_slave_addr; - - if (num_of_slaves < 1) - return num_tx_total; - - memcpy(slaves, internals->tlb_slaves_order, - sizeof(internals->tlb_slaves_order[0]) * num_of_slaves); - - - ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr); - - if (nb_pkts > 3) { - for (i = 0; i < 3; i++) - rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*)); - } - - for (i = 0; i < num_of_slaves; i++) { - rte_eth_macaddr_get(slaves[i], &active_slave_addr); - for (j = num_tx_total; j < nb_pkts; j++) { - if (j + 3 < nb_pkts) - rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*)); - - ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *); - if (is_same_ether_addr(ðer_hdr->s_addr, &primary_slave_addr)) - ether_addr_copy(&active_slave_addr, ðer_hdr->s_addr); -#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) - mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX); -#endif - } - - num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, - bufs + num_tx_total, nb_pkts - num_tx_total); - - if (num_tx_total == nb_pkts) - break; - } - - return num_tx_total; -} - -void -bond_tlb_disable(struct bond_dev_private *internals) -{ - rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals); -} - -void -bond_tlb_enable(struct bond_dev_private *internals) -{ - bond_ethdev_update_tlb_slave_cb(internals); -} - -static uint16_t -bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) -{ - struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; - struct bond_dev_private *internals = bd_tx_q->dev_private; - - struct ether_hdr *eth_h; - uint16_t ether_type, offset; - - struct client_data *client_info; - - /* - * We create transmit buffers for every slave and one additional to send - * through tlb. In worst case every packet will be send on one port. - */ - struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts]; - uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 }; - - /* - * We create separate transmit buffers for update packets as they wont be - * counted in num_tx_total. - */ - struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE]; - uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 }; - - struct rte_mbuf *upd_pkt; - size_t pkt_size; - - uint16_t num_send, num_not_send = 0; - uint16_t num_tx_total = 0; - uint8_t slave_idx; - - int i, j; - - /* Search tx buffer for ARP packets and forward them to alb */ - for (i = 0; i < nb_pkts; i++) { - eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *); - ether_type = eth_h->ether_type; - offset = get_vlan_offset(eth_h, ðer_type); - - if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) { - slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals); - - /* Change src mac in eth header */ - rte_eth_macaddr_get(slave_idx, ð_h->s_addr); - - /* Add packet to slave tx buffer */ - slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i]; - slave_bufs_pkts[slave_idx]++; - } else { - /* If packet is not ARP, send it with TLB policy */ - slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] = - bufs[i]; - slave_bufs_pkts[RTE_MAX_ETHPORTS]++; - } - } - - /* Update connected client ARP tables */ - if (internals->mode6.ntt) { - for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) { - client_info = &internals->mode6.client_table[i]; - - if (client_info->in_use) { - /* Allocate new packet to send ARP update on current slave */ - upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool); - if (upd_pkt == NULL) { - RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n"); - continue; - } - pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr) - + client_info->vlan_count * sizeof(struct vlan_hdr); - upd_pkt->data_len = pkt_size; - upd_pkt->pkt_len = pkt_size; - - slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt, - internals); - - /* Add packet to update tx buffer */ - update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt; - update_bufs_pkts[slave_idx]++; - } - } - internals->mode6.ntt = 0; - } - - /* Send ARP packets on proper slaves */ - for (i = 0; i < RTE_MAX_ETHPORTS; i++) { - if (slave_bufs_pkts[i] > 0) { - num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, - slave_bufs[i], slave_bufs_pkts[i]); - for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) { - bufs[nb_pkts - 1 - num_not_send - j] = - slave_bufs[i][nb_pkts - 1 - j]; - } - - num_tx_total += num_send; - num_not_send += slave_bufs_pkts[i] - num_send; - -#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) - /* Print TX stats including update packets */ - for (j = 0; j < slave_bufs_pkts[i]; j++) { - eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *); - mode6_debug("TX ARP:", eth_h, i, &burstnumberTX); - } -#endif - } - } - - /* Send update packets on proper slaves */ - for (i = 0; i < RTE_MAX_ETHPORTS; i++) { - if (update_bufs_pkts[i] > 0) { - num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i], - update_bufs_pkts[i]); - for (j = num_send; j < update_bufs_pkts[i]; j++) { - rte_pktmbuf_free(update_bufs[i][j]); - } -#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) - for (j = 0; j < update_bufs_pkts[i]; j++) { - eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *); - mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX); - } -#endif - } - } - - /* Send non-ARP packets using tlb policy */ - if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) { - num_send = bond_ethdev_tx_burst_tlb(queue, - slave_bufs[RTE_MAX_ETHPORTS], - slave_bufs_pkts[RTE_MAX_ETHPORTS]); - - for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) { - bufs[nb_pkts - 1 - num_not_send - j] = - slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j]; - } - - num_tx_total += num_send; - num_not_send += slave_bufs_pkts[RTE_MAX_ETHPORTS] - num_send; - } - - return num_tx_total; -} - -static uint16_t -bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs, - uint16_t nb_pkts) -{ - struct bond_dev_private *internals; - struct bond_tx_queue *bd_tx_q; - - uint8_t num_of_slaves; - uint8_t slaves[RTE_MAX_ETHPORTS]; - - uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0; - - int i, op_slave_id; - - struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts]; - uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 }; - - bd_tx_q = (struct bond_tx_queue *)queue; - internals = bd_tx_q->dev_private; - - /* Copy slave list to protect against slave up/down changes during tx - * bursting */ - num_of_slaves = internals->active_slave_count; - memcpy(slaves, internals->active_slaves, - sizeof(internals->active_slaves[0]) * num_of_slaves); - - if (num_of_slaves < 1) - return num_tx_total; - - /* Populate slaves mbuf with the packets which are to be sent on it */ - for (i = 0; i < nb_pkts; i++) { - /* Select output slave using hash based on xmit policy */ - op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves); - - /* Populate slave mbuf arrays with mbufs for that slave */ - slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i]; - } - - /* Send packet burst on each slave device */ - for (i = 0; i < num_of_slaves; i++) { - if (slave_nb_pkts[i] > 0) { - num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, - slave_bufs[i], slave_nb_pkts[i]); - - /* if tx burst fails move packets to end of bufs */ - if (unlikely(num_tx_slave < slave_nb_pkts[i])) { - int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave; - - tx_fail_total += slave_tx_fail_count; - memcpy(&bufs[nb_pkts - tx_fail_total], - &slave_bufs[i][num_tx_slave], - slave_tx_fail_count * sizeof(bufs[0])); - } - - num_tx_total += num_tx_slave; - } - } - - return num_tx_total; -} - -static uint16_t -bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, - uint16_t nb_pkts) -{ - struct bond_dev_private *internals; - struct bond_tx_queue *bd_tx_q; - - uint8_t num_of_slaves; - uint8_t slaves[RTE_MAX_ETHPORTS]; - /* positions in slaves, not ID */ - uint8_t distributing_offsets[RTE_MAX_ETHPORTS]; - uint8_t distributing_count; - - uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0; - uint16_t i, j, op_slave_idx; - const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1; - - /* Allocate additional packets in case 8023AD mode. */ - struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size]; - void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL }; - - /* Total amount of packets in slave_bufs */ - uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 }; - /* Slow packets placed in each slave */ - uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 }; - - bd_tx_q = (struct bond_tx_queue *)queue; - internals = bd_tx_q->dev_private; - - /* Copy slave list to protect against slave up/down changes during tx - * bursting */ - num_of_slaves = internals->active_slave_count; - if (num_of_slaves < 1) - return num_tx_total; - - memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves); - - distributing_count = 0; - for (i = 0; i < num_of_slaves; i++) { - struct port *port = &mode_8023ad_ports[slaves[i]]; - - slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring, - slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS); - slave_nb_pkts[i] = slave_slow_nb_pkts[i]; - - for (j = 0; j < slave_slow_nb_pkts[i]; j++) - slave_bufs[i][j] = slow_pkts[j]; - - if (ACTOR_STATE(port, DISTRIBUTING)) - distributing_offsets[distributing_count++] = i; - } - - if (likely(distributing_count > 0)) { - /* Populate slaves mbuf with the packets which are to be sent on it */ - for (i = 0; i < nb_pkts; i++) { - /* Select output slave using hash based on xmit policy */ - op_slave_idx = internals->xmit_hash(bufs[i], distributing_count); - - /* Populate slave mbuf arrays with mbufs for that slave. Use only - * slaves that are currently distributing. */ - uint8_t slave_offset = distributing_offsets[op_slave_idx]; - slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i]; - slave_nb_pkts[slave_offset]++; - } - } - - /* Send packet burst on each slave device */ - for (i = 0; i < num_of_slaves; i++) { - if (slave_nb_pkts[i] == 0) - continue; - - num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, - slave_bufs[i], slave_nb_pkts[i]); - - /* If tx burst fails drop slow packets */ - for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++) - rte_pktmbuf_free(slave_bufs[i][num_tx_slave]); - - num_tx_total += num_tx_slave - slave_slow_nb_pkts[i]; - num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave; - - /* If tx burst fails move packets to end of bufs */ - if (unlikely(num_tx_slave < slave_nb_pkts[i])) { - uint16_t j = nb_pkts - num_tx_fail_total; - for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++) - bufs[j] = slave_bufs[i][num_tx_slave]; - } - } - - return num_tx_total; -} - -static uint16_t -bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs, - uint16_t nb_pkts) -{ - struct bond_dev_private *internals; - struct bond_tx_queue *bd_tx_q; - - uint8_t tx_failed_flag = 0, num_of_slaves; - uint8_t slaves[RTE_MAX_ETHPORTS]; - - uint16_t max_nb_of_tx_pkts = 0; - - int slave_tx_total[RTE_MAX_ETHPORTS]; - int i, most_successful_tx_slave = -1; - - bd_tx_q = (struct bond_tx_queue *)queue; - internals = bd_tx_q->dev_private; - - /* Copy slave list to protect against slave up/down changes during tx - * bursting */ - num_of_slaves = internals->active_slave_count; - memcpy(slaves, internals->active_slaves, - sizeof(internals->active_slaves[0]) * num_of_slaves); - - if (num_of_slaves < 1) - return 0; - - /* Increment reference count on mbufs */ - for (i = 0; i < nb_pkts; i++) - rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1); - - /* Transmit burst on each active slave */ - for (i = 0; i < num_of_slaves; i++) { - slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, - bufs, nb_pkts); - - if (unlikely(slave_tx_total[i] < nb_pkts)) - tx_failed_flag = 1; - - /* record the value and slave index for the slave which transmits the - * maximum number of packets */ - if (slave_tx_total[i] > max_nb_of_tx_pkts) { - max_nb_of_tx_pkts = slave_tx_total[i]; - most_successful_tx_slave = i; - } - } - - /* if slaves fail to transmit packets from burst, the calling application - * is not expected to know about multiple references to packets so we must - * handle failures of all packets except those of the most successful slave - */ - if (unlikely(tx_failed_flag)) - for (i = 0; i < num_of_slaves; i++) - if (i != most_successful_tx_slave) - while (slave_tx_total[i] < nb_pkts) - rte_pktmbuf_free(bufs[slave_tx_total[i]++]); - - return max_nb_of_tx_pkts; -} - -void -link_properties_set(struct rte_eth_dev *bonded_eth_dev, - struct rte_eth_link *slave_dev_link) -{ - struct rte_eth_link *bonded_dev_link = &bonded_eth_dev->data->dev_link; - struct bond_dev_private *internals = bonded_eth_dev->data->dev_private; - - if (slave_dev_link->link_status && - bonded_eth_dev->data->dev_started) { - bonded_dev_link->link_duplex = slave_dev_link->link_duplex; - bonded_dev_link->link_speed = slave_dev_link->link_speed; - - internals->link_props_set = 1; - } -} - -void -link_properties_reset(struct rte_eth_dev *bonded_eth_dev) -{ - struct bond_dev_private *internals = bonded_eth_dev->data->dev_private; - - memset(&(bonded_eth_dev->data->dev_link), 0, - sizeof(bonded_eth_dev->data->dev_link)); - - internals->link_props_set = 0; -} - -int -link_properties_valid(struct rte_eth_link *bonded_dev_link, - struct rte_eth_link *slave_dev_link) -{ - if (bonded_dev_link->link_duplex != slave_dev_link->link_duplex || - bonded_dev_link->link_speed != slave_dev_link->link_speed) - return -1; - - return 0; -} - -int -mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr) -{ - struct ether_addr *mac_addr; - - if (eth_dev == NULL) { - RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__); - return -1; - } - - if (dst_mac_addr == NULL) { - RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__); - return -1; - } - - mac_addr = eth_dev->data->mac_addrs; - - ether_addr_copy(mac_addr, dst_mac_addr); - return 0; -} - -int -mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr) -{ - struct ether_addr *mac_addr; - - if (eth_dev == NULL) { - RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified"); - return -1; - } - - if (new_mac_addr == NULL) { - RTE_BOND_LOG(ERR, "NULL pointer MAC specified"); - return -1; - } - - mac_addr = eth_dev->data->mac_addrs; - - /* If new MAC is different to current MAC then update */ - if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0) - memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr)); - - return 0; -} - -int -mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev) -{ - struct bond_dev_private *internals = bonded_eth_dev->data->dev_private; - int i; - - /* Update slave devices MAC addresses */ - if (internals->slave_count < 1) - return -1; - - switch (internals->mode) { - case BONDING_MODE_ROUND_ROBIN: - case BONDING_MODE_BALANCE: - case BONDING_MODE_BROADCAST: - for (i = 0; i < internals->slave_count; i++) { - if (mac_address_set(&rte_eth_devices[internals->slaves[i].port_id], - bonded_eth_dev->data->mac_addrs)) { - RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address", - internals->slaves[i].port_id); - return -1; - } - } - break; - case BONDING_MODE_8023AD: - bond_mode_8023ad_mac_address_update(bonded_eth_dev); - break; - case BONDING_MODE_ACTIVE_BACKUP: - case BONDING_MODE_TLB: - case BONDING_MODE_ALB: - default: - for (i = 0; i < internals->slave_count; i++) { - if (internals->slaves[i].port_id == - internals->current_primary_port) { - if (mac_address_set(&rte_eth_devices[internals->primary_port], - bonded_eth_dev->data->mac_addrs)) { - RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address", - internals->current_primary_port); - return -1; - } - } else { - if (mac_address_set( - &rte_eth_devices[internals->slaves[i].port_id], - &internals->slaves[i].persisted_mac_addr)) { - RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address", - internals->slaves[i].port_id); - return -1; - } - } - } - } - - return 0; -} - -int -bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode) -{ - struct bond_dev_private *internals; - - internals = eth_dev->data->dev_private; - - switch (mode) { - case BONDING_MODE_ROUND_ROBIN: - eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin; - eth_dev->rx_pkt_burst = bond_ethdev_rx_burst; - break; - case BONDING_MODE_ACTIVE_BACKUP: - eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup; - eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup; - break; - case BONDING_MODE_BALANCE: - eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance; - eth_dev->rx_pkt_burst = bond_ethdev_rx_burst; - break; - case BONDING_MODE_BROADCAST: - eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast; - eth_dev->rx_pkt_burst = bond_ethdev_rx_burst; - break; - case BONDING_MODE_8023AD: - if (bond_mode_8023ad_enable(eth_dev) != 0) - return -1; - - eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad; - eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad; - RTE_LOG(WARNING, PMD, - "Using mode 4, it is necessary to do TX burst and RX burst " - "at least every 100ms.\n"); - break; - case BONDING_MODE_TLB: - eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb; - eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup; - break; - case BONDING_MODE_ALB: - if (bond_mode_alb_enable(eth_dev) != 0) - return -1; - - eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb; - eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb; - break; - default: - return -1; - } - - internals->mode = mode; - - return 0; -} - -int -slave_configure(struct rte_eth_dev *bonded_eth_dev, - struct rte_eth_dev *slave_eth_dev) -{ - struct bond_rx_queue *bd_rx_q; - struct bond_tx_queue *bd_tx_q; - - int errval; - uint16_t q_id; - - /* Stop slave */ - rte_eth_dev_stop(slave_eth_dev->data->port_id); - - /* Enable interrupts on slave device if supported */ - if (slave_eth_dev->driver->pci_drv.drv_flags & RTE_PCI_DRV_INTR_LSC) - slave_eth_dev->data->dev_conf.intr_conf.lsc = 1; - - /* Configure device */ - errval = rte_eth_dev_configure(slave_eth_dev->data->port_id, - bonded_eth_dev->data->nb_rx_queues, - bonded_eth_dev->data->nb_tx_queues, - &(slave_eth_dev->data->dev_conf)); - if (errval != 0) { - RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)", - slave_eth_dev->data->port_id, errval); - return errval; - } - - /* Setup Rx Queues */ - for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) { - bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id]; - - errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id, - bd_rx_q->nb_rx_desc, - rte_eth_dev_socket_id(slave_eth_dev->data->port_id), - &(bd_rx_q->rx_conf), bd_rx_q->mb_pool); - if (errval != 0) { - RTE_BOND_LOG(ERR, - "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)", - slave_eth_dev->data->port_id, q_id, errval); - return errval; - } - } - - /* Setup Tx Queues */ - for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) { - bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id]; - - errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id, - bd_tx_q->nb_tx_desc, - rte_eth_dev_socket_id(slave_eth_dev->data->port_id), - &bd_tx_q->tx_conf); - if (errval != 0) { - RTE_BOND_LOG(ERR, - "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)", - slave_eth_dev->data->port_id, q_id, errval); - return errval; - } - } - - /* Start device */ - errval = rte_eth_dev_start(slave_eth_dev->data->port_id); - if (errval != 0) { - RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)", - slave_eth_dev->data->port_id, errval); - return -1; - } - - return 0; -} - -void -slave_remove(struct bond_dev_private *internals, - struct rte_eth_dev *slave_eth_dev) -{ - uint8_t i; - - for (i = 0; i < internals->slave_count; i++) - if (internals->slaves[i].port_id == - slave_eth_dev->data->port_id) - break; - - if (i < (internals->slave_count - 1)) - memmove(&internals->slaves[i], &internals->slaves[i + 1], - sizeof(internals->slaves[0]) * - (internals->slave_count - i - 1)); - - internals->slave_count--; -} - -static void -bond_ethdev_slave_link_status_change_monitor(void *cb_arg); - -void -slave_add(struct bond_dev_private *internals, - struct rte_eth_dev *slave_eth_dev) -{ - struct bond_slave_details *slave_details = - &internals->slaves[internals->slave_count]; - - slave_details->port_id = slave_eth_dev->data->port_id; - slave_details->last_link_status = 0; - - /* If slave device doesn't support interrupts then we need to enabled - * polling to monitor link status */ - if (!(slave_eth_dev->pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)) { - slave_details->link_status_poll_enabled = 1; - - if (!internals->link_status_polling_enabled) { - internals->link_status_polling_enabled = 1; - - rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000, - bond_ethdev_slave_link_status_change_monitor, - (void *)&rte_eth_devices[internals->port_id]); - } - } - - slave_details->link_status_wait_to_complete = 0; - /* clean tlb_last_obytes when adding port for bonding device */ - memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs, - sizeof(struct ether_addr)); -} - -void -bond_ethdev_primary_set(struct bond_dev_private *internals, - uint8_t slave_port_id) -{ - int i; - - if (internals->active_slave_count < 1) - internals->current_primary_port = slave_port_id; - else - /* Search bonded device slave ports for new proposed primary port */ - for (i = 0; i < internals->active_slave_count; i++) { - if (internals->active_slaves[i] == slave_port_id) - internals->current_primary_port = slave_port_id; - } -} - -static void -bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev); - -static int -bond_ethdev_start(struct rte_eth_dev *eth_dev) -{ - struct bond_dev_private *internals; - int i; - - /* slave eth dev will be started by bonded device */ - if (valid_bonded_ethdev(eth_dev)) { - RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)", - eth_dev->data->port_id); - return -1; - } - - eth_dev->data->dev_link.link_status = 0; - eth_dev->data->dev_started = 1; - - internals = eth_dev->data->dev_private; - - if (internals->slave_count == 0) { - RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices"); - return -1; - } - - if (internals->user_defined_mac == 0) { - struct ether_addr *new_mac_addr = NULL; - - for (i = 0; i < internals->slave_count; i++) - if (internals->slaves[i].port_id == internals->primary_port) - new_mac_addr = &internals->slaves[i].persisted_mac_addr; - - if (new_mac_addr == NULL) - return -1; - - if (mac_address_set(eth_dev, new_mac_addr) != 0) { - RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address", - eth_dev->data->port_id); - return -1; - } - } - - /* Update all slave devices MACs*/ - if (mac_address_slaves_update(eth_dev) != 0) - return -1; - - /* If bonded device is configure in promiscuous mode then re-apply config */ - if (internals->promiscuous_en) - bond_ethdev_promiscuous_enable(eth_dev); - - /* Reconfigure each slave device if starting bonded device */ - for (i = 0; i < internals->slave_count; i++) { - if (slave_configure(eth_dev, - &(rte_eth_devices[internals->slaves[i].port_id])) != 0) { - RTE_BOND_LOG(ERR, - "bonded port (%d) failed to reconfigure slave device (%d)", - eth_dev->data->port_id, internals->slaves[i].port_id); - return -1; - } - } - - if (internals->user_defined_primary_port) - bond_ethdev_primary_set(internals, internals->primary_port); - - if (internals->mode == BONDING_MODE_8023AD) - bond_mode_8023ad_start(eth_dev); - - if (internals->mode == BONDING_MODE_TLB || - internals->mode == BONDING_MODE_ALB) - bond_tlb_enable(internals); - - return 0; -} - -static void -bond_ethdev_stop(struct rte_eth_dev *eth_dev) -{ - struct bond_dev_private *internals = eth_dev->data->dev_private; - uint8_t i; - - if (internals->mode == BONDING_MODE_8023AD) { - struct port *port; - void *pkt = NULL; - - bond_mode_8023ad_stop(eth_dev); - - /* Discard all messages to/from mode 4 state machines */ - for (i = 0; i < internals->slave_count; i++) { - port = &mode_8023ad_ports[internals->slaves[i].port_id]; - - RTE_VERIFY(port->rx_ring != NULL); - while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT) - rte_pktmbuf_free(pkt); - - RTE_VERIFY(port->tx_ring != NULL); - while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT) - rte_pktmbuf_free(pkt); - } - } - - if (internals->mode == BONDING_MODE_TLB || - internals->mode == BONDING_MODE_ALB) { - bond_tlb_disable(internals); - for (i = 0; i < internals->active_slave_count; i++) - tlb_last_obytets[internals->active_slaves[i]] = 0; - } - - internals->active_slave_count = 0; - internals->link_status_polling_enabled = 0; - - eth_dev->data->dev_link.link_status = 0; - eth_dev->data->dev_started = 0; -} - -static void -bond_ethdev_close(struct rte_eth_dev *dev __rte_unused) -{ -} - -/* forward declaration */ -static int bond_ethdev_configure(struct rte_eth_dev *dev); - -static void -bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) -{ - struct bond_dev_private *internals = dev->data->dev_private; - - dev_info->driver_name = driver_name; - dev_info->max_mac_addrs = 1; - - dev_info->max_rx_pktlen = (uint32_t)2048; - - dev_info->max_rx_queues = (uint16_t)128; - dev_info->max_tx_queues = (uint16_t)512; - - dev_info->min_rx_bufsize = 0; - dev_info->pci_dev = dev->pci_dev; - - dev_info->rx_offload_capa = internals->rx_offload_capa; - dev_info->tx_offload_capa = internals->tx_offload_capa; -} - -static int -bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, - uint16_t nb_rx_desc, unsigned int socket_id __rte_unused, - const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool) -{ - struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *) - rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue), - 0, dev->pci_dev->numa_node); - if (bd_rx_q == NULL) - return -1; - - bd_rx_q->queue_id = rx_queue_id; - bd_rx_q->dev_private = dev->data->dev_private; - - bd_rx_q->nb_rx_desc = nb_rx_desc; - - memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf)); - bd_rx_q->mb_pool = mb_pool; - - dev->data->rx_queues[rx_queue_id] = bd_rx_q; - - return 0; -} - -static int -bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, - uint16_t nb_tx_desc, unsigned int socket_id __rte_unused, - const struct rte_eth_txconf *tx_conf) -{ - struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *) - rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue), - 0, dev->pci_dev->numa_node); - - if (bd_tx_q == NULL) - return -1; - - bd_tx_q->queue_id = tx_queue_id; - bd_tx_q->dev_private = dev->data->dev_private; - - bd_tx_q->nb_tx_desc = nb_tx_desc; - memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf)); - - dev->data->tx_queues[tx_queue_id] = bd_tx_q; - - return 0; -} - -static void -bond_ethdev_rx_queue_release(void *queue) -{ - if (queue == NULL) - return; - - rte_free(queue); -} - -static void -bond_ethdev_tx_queue_release(void *queue) -{ - if (queue == NULL) - return; - - rte_free(queue); -} - -static void -bond_ethdev_slave_link_status_change_monitor(void *cb_arg) -{ - struct rte_eth_dev *bonded_ethdev, *slave_ethdev; - struct bond_dev_private *internals; - - /* Default value for polling slave found is true as we don't want to - * disable the polling thread if we cannot get the lock */ - int i, polling_slave_found = 1; - - if (cb_arg == NULL) - return; - - bonded_ethdev = (struct rte_eth_dev *)cb_arg; - internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private; - - if (!bonded_ethdev->data->dev_started || - !internals->link_status_polling_enabled) - return; - - /* If device is currently being configured then don't check slaves link - * status, wait until next period */ - if (rte_spinlock_trylock(&internals->lock)) { - if (internals->slave_count > 0) - polling_slave_found = 0; - - for (i = 0; i < internals->slave_count; i++) { - if (!internals->slaves[i].link_status_poll_enabled) - continue; - - slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id]; - polling_slave_found = 1; - - /* Update slave link status */ - (*slave_ethdev->dev_ops->link_update)(slave_ethdev, - internals->slaves[i].link_status_wait_to_complete); - - /* if link status has changed since last checked then call lsc - * event callback */ - if (slave_ethdev->data->dev_link.link_status != - internals->slaves[i].last_link_status) { - internals->slaves[i].last_link_status = - slave_ethdev->data->dev_link.link_status; - - bond_ethdev_lsc_event_callback(internals->slaves[i].port_id, - RTE_ETH_EVENT_INTR_LSC, - &bonded_ethdev->data->port_id); - } - } - rte_spinlock_unlock(&internals->lock); - } - - if (polling_slave_found) - /* Set alarm to continue monitoring link status of slave ethdev's */ - rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000, - bond_ethdev_slave_link_status_change_monitor, cb_arg); -} - -static int -bond_ethdev_link_update(struct rte_eth_dev *bonded_eth_dev, - int wait_to_complete) -{ - struct bond_dev_private *internals = bonded_eth_dev->data->dev_private; - - if (!bonded_eth_dev->data->dev_started || - internals->active_slave_count == 0) { - bonded_eth_dev->data->dev_link.link_status = 0; - return 0; - } else { - struct rte_eth_dev *slave_eth_dev; - int i, link_up = 0; - - for (i = 0; i < internals->active_slave_count; i++) { - slave_eth_dev = &rte_eth_devices[internals->active_slaves[i]]; - - (*slave_eth_dev->dev_ops->link_update)(slave_eth_dev, - wait_to_complete); - if (slave_eth_dev->data->dev_link.link_status == 1) { - link_up = 1; - break; - } - } - - bonded_eth_dev->data->dev_link.link_status = link_up; - } - - return 0; -} - -static void -bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) -{ - struct bond_dev_private *internals = dev->data->dev_private; - struct rte_eth_stats slave_stats; - - int i; - - /* clear bonded stats before populating from slaves */ - memset(stats, 0, sizeof(*stats)); - - for (i = 0; i < internals->slave_count; i++) { - rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats); - - stats->ipackets += slave_stats.ipackets; - stats->opackets += slave_stats.opackets; - stats->ibytes += slave_stats.ibytes; - stats->obytes += slave_stats.obytes; - stats->ierrors += slave_stats.ierrors; - stats->oerrors += slave_stats.oerrors; - stats->imcasts += slave_stats.imcasts; - stats->rx_nombuf += slave_stats.rx_nombuf; - stats->fdirmatch += slave_stats.fdirmatch; - stats->fdirmiss += slave_stats.fdirmiss; - stats->tx_pause_xon += slave_stats.tx_pause_xon; - stats->rx_pause_xon += slave_stats.rx_pause_xon; - stats->tx_pause_xoff += slave_stats.tx_pause_xoff; - stats->rx_pause_xoff += slave_stats.rx_pause_xoff; - } -} - -static void -bond_ethdev_stats_reset(struct rte_eth_dev *dev) -{ - struct bond_dev_private *internals = dev->data->dev_private; - int i; - - for (i = 0; i < internals->slave_count; i++) - rte_eth_stats_reset(internals->slaves[i].port_id); -} - -static void -bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev) -{ - struct bond_dev_private *internals = eth_dev->data->dev_private; - int i; - - internals->promiscuous_en = 1; - - switch (internals->mode) { - /* Promiscuous mode is propagated to all slaves */ - case BONDING_MODE_ROUND_ROBIN: - case BONDING_MODE_BALANCE: - case BONDING_MODE_BROADCAST: - for (i = 0; i < internals->slave_count; i++) - rte_eth_promiscuous_enable(internals->slaves[i].port_id); - break; - /* In mode4 promiscus mode is managed when slave is added/removed */ - case BONDING_MODE_8023AD: - break; - /* Promiscuous mode is propagated only to primary slave */ - case BONDING_MODE_ACTIVE_BACKUP: - case BONDING_MODE_TLB: - case BONDING_MODE_ALB: - default: - rte_eth_promiscuous_enable(internals->current_primary_port); - } -} - -static void -bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev) -{ - struct bond_dev_private *internals = dev->data->dev_private; - int i; - - internals->promiscuous_en = 0; - - switch (internals->mode) { - /* Promiscuous mode is propagated to all slaves */ - case BONDING_MODE_ROUND_ROBIN: - case BONDING_MODE_BALANCE: - case BONDING_MODE_BROADCAST: - for (i = 0; i < internals->slave_count; i++) - rte_eth_promiscuous_disable(internals->slaves[i].port_id); - break; - /* In mode4 promiscus mode is set managed when slave is added/removed */ - case BONDING_MODE_8023AD: - break; - /* Promiscuous mode is propagated only to primary slave */ - case BONDING_MODE_ACTIVE_BACKUP: - case BONDING_MODE_TLB: - case BONDING_MODE_ALB: - default: - rte_eth_promiscuous_disable(internals->current_primary_port); - } -} - -static void -bond_ethdev_delayed_lsc_propagation(void *arg) -{ - if (arg == NULL) - return; - - _rte_eth_dev_callback_process((struct rte_eth_dev *)arg, - RTE_ETH_EVENT_INTR_LSC); -} - -void -bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type, - void *param) -{ - struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev; - struct bond_dev_private *internals; - struct rte_eth_link link; - - int i, valid_slave = 0; - uint8_t active_pos; - uint8_t lsc_flag = 0; - - if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL) - return; - - bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param]; - slave_eth_dev = &rte_eth_devices[port_id]; - - if (valid_bonded_ethdev(bonded_eth_dev)) - return; - - internals = bonded_eth_dev->data->dev_private; - - /* If the device isn't started don't handle interrupts */ - if (!bonded_eth_dev->data->dev_started) - return; - - /* verify that port_id is a valid slave of bonded port */ - for (i = 0; i < internals->slave_count; i++) { - if (internals->slaves[i].port_id == port_id) { - valid_slave = 1; - break; - } - } - - if (!valid_slave) - return; - - /* Search for port in active port list */ - active_pos = find_slave_by_id(internals->active_slaves, - internals->active_slave_count, port_id); - - rte_eth_link_get_nowait(port_id, &link); - if (link.link_status) { - if (active_pos < internals->active_slave_count) - return; - - /* if no active slave ports then set this port to be primary port */ - if (internals->active_slave_count < 1) { - /* If first active slave, then change link status */ - bonded_eth_dev->data->dev_link.link_status = 1; - internals->current_primary_port = port_id; - lsc_flag = 1; - - mac_address_slaves_update(bonded_eth_dev); - - /* Inherit eth dev link properties from first active slave */ - link_properties_set(bonded_eth_dev, - &(slave_eth_dev->data->dev_link)); - } - - activate_slave(bonded_eth_dev, port_id); - - /* If user has defined the primary port then default to using it */ - if (internals->user_defined_primary_port && - internals->primary_port == port_id) - bond_ethdev_primary_set(internals, port_id); - } else { - if (active_pos == internals->active_slave_count) - return; - - /* Remove from active slave list */ - deactivate_slave(bonded_eth_dev, port_id); - - /* No active slaves, change link status to down and reset other - * link properties */ - if (internals->active_slave_count < 1) { - lsc_flag = 1; - bonded_eth_dev->data->dev_link.link_status = 0; - - link_properties_reset(bonded_eth_dev); - } - - /* Update primary id, take first active slave from list or if none - * available set to -1 */ - if (port_id == internals->current_primary_port) { - if (internals->active_slave_count > 0) - bond_ethdev_primary_set(internals, - internals->active_slaves[0]); - else - internals->current_primary_port = internals->primary_port; - } - } - - if (lsc_flag) { - /* Cancel any possible outstanding interrupts if delays are enabled */ - if (internals->link_up_delay_ms > 0 || - internals->link_down_delay_ms > 0) - rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation, - bonded_eth_dev); - - if (bonded_eth_dev->data->dev_link.link_status) { - if (internals->link_up_delay_ms > 0) - rte_eal_alarm_set(internals->link_up_delay_ms * 1000, - bond_ethdev_delayed_lsc_propagation, - (void *)bonded_eth_dev); - else - _rte_eth_dev_callback_process(bonded_eth_dev, - RTE_ETH_EVENT_INTR_LSC); - - } else { - if (internals->link_down_delay_ms > 0) - rte_eal_alarm_set(internals->link_down_delay_ms * 1000, - bond_ethdev_delayed_lsc_propagation, - (void *)bonded_eth_dev); - else - _rte_eth_dev_callback_process(bonded_eth_dev, - RTE_ETH_EVENT_INTR_LSC); - } - } -} - -struct eth_dev_ops default_dev_ops = { - .dev_start = bond_ethdev_start, - .dev_stop = bond_ethdev_stop, - .dev_close = bond_ethdev_close, - .dev_configure = bond_ethdev_configure, - .dev_infos_get = bond_ethdev_info, - .rx_queue_setup = bond_ethdev_rx_queue_setup, - .tx_queue_setup = bond_ethdev_tx_queue_setup, - .rx_queue_release = bond_ethdev_rx_queue_release, - .tx_queue_release = bond_ethdev_tx_queue_release, - .link_update = bond_ethdev_link_update, - .stats_get = bond_ethdev_stats_get, - .stats_reset = bond_ethdev_stats_reset, - .promiscuous_enable = bond_ethdev_promiscuous_enable, - .promiscuous_disable = bond_ethdev_promiscuous_disable -}; - -static int -bond_init(const char *name, const char *params) -{ - struct bond_dev_private *internals; - struct rte_kvargs *kvlist; - uint8_t bonding_mode, socket_id; - int arg_count, port_id; - - RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name); - - kvlist = rte_kvargs_parse(params, pmd_bond_init_valid_arguments); - if (kvlist == NULL) - return -1; - - /* Parse link bonding mode */ - if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) { - if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG, - &bond_ethdev_parse_slave_mode_kvarg, - &bonding_mode) != 0) { - RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n", - name); - goto parse_error; - } - } else { - RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded " - "device %s\n", name); - goto parse_error; - } - - /* Parse socket id to create bonding device on */ - arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG); - if (arg_count == 1) { - if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG, - &bond_ethdev_parse_socket_id_kvarg, &socket_id) - != 0) { - RTE_LOG(ERR, EAL, "Invalid socket Id specified for " - "bonded device %s\n", name); - goto parse_error; - } - } else if (arg_count > 1) { - RTE_LOG(ERR, EAL, "Socket Id can be specified only once for " - "bonded device %s\n", name); - goto parse_error; - } else { - socket_id = rte_socket_id(); - } - - /* Create link bonding eth device */ - port_id = rte_eth_bond_create(name, bonding_mode, socket_id); - if (port_id < 0) { - RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on " - "socket %u.\n", name, bonding_mode, socket_id); - goto parse_error; - } - internals = rte_eth_devices[port_id].data->dev_private; - internals->kvlist = kvlist; - - RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on " - "socket %u.\n", name, port_id, bonding_mode, socket_id); - return 0; - -parse_error: - rte_kvargs_free(kvlist); - - return -1; -} - -/* this part will resolve the slave portids after all the other pdev and vdev - * have been allocated */ -static int -bond_ethdev_configure(struct rte_eth_dev *dev) -{ - char *name = dev->data->name; - struct bond_dev_private *internals = dev->data->dev_private; - struct rte_kvargs *kvlist = internals->kvlist; - int arg_count; - uint8_t port_id = dev - rte_eth_devices; - - /* - * if no kvlist, it means that this bonded device has been created - * through the bonding api. - */ - if (!kvlist) - return 0; - - /* Parse MAC address for bonded device */ - arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG); - if (arg_count == 1) { - struct ether_addr bond_mac; - - if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG, - &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) { - RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n", - name); - return -1; - } - - /* Set MAC address */ - if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) { - RTE_LOG(ERR, EAL, - "Failed to set mac address on bonded device %s\n", - name); - return -1; - } - } else if (arg_count > 1) { - RTE_LOG(ERR, EAL, - "MAC address can be specified only once for bonded device %s\n", - name); - return -1; - } - - /* Parse/set balance mode transmit policy */ - arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG); - if (arg_count == 1) { - uint8_t xmit_policy; - - if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG, - &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) != - 0) { - RTE_LOG(INFO, EAL, - "Invalid xmit policy specified for bonded device %s\n", - name); - return -1; - } - - /* Set balance mode transmit policy*/ - if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) { - RTE_LOG(ERR, EAL, - "Failed to set balance xmit policy on bonded device %s\n", - name); - return -1; - } - } else if (arg_count > 1) { - RTE_LOG(ERR, EAL, - "Transmit policy can be specified only once for bonded device" - " %s\n", name); - return -1; - } - - /* Parse/add slave ports to bonded device */ - if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) { - struct bond_ethdev_slave_ports slave_ports; - unsigned i; - - memset(&slave_ports, 0, sizeof(slave_ports)); - - if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG, - &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) { - RTE_LOG(ERR, EAL, - "Failed to parse slave ports for bonded device %s\n", - name); - return -1; - } - - for (i = 0; i < slave_ports.slave_count; i++) { - if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) { - RTE_LOG(ERR, EAL, - "Failed to add port %d as slave to bonded device %s\n", - slave_ports.slaves[i], name); - } - } - - } else { - RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name); - return -1; - } - - /* Parse/set primary slave port id*/ - arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG); - if (arg_count == 1) { - uint8_t primary_slave_port_id; - - if (rte_kvargs_process(kvlist, - PMD_BOND_PRIMARY_SLAVE_KVARG, - &bond_ethdev_parse_primary_slave_port_id_kvarg, - &primary_slave_port_id) < 0) { - RTE_LOG(INFO, EAL, - "Invalid primary slave port id specified for bonded device" - " %s\n", name); - return -1; - } - - /* Set balance mode transmit policy*/ - if (rte_eth_bond_primary_set(port_id, (uint8_t)primary_slave_port_id) - != 0) { - RTE_LOG(ERR, EAL, - "Failed to set primary slave port %d on bonded device %s\n", - primary_slave_port_id, name); - return -1; - } - } else if (arg_count > 1) { - RTE_LOG(INFO, EAL, - "Primary slave can be specified only once for bonded device" - " %s\n", name); - return -1; - } - - /* Parse link status monitor polling interval */ - arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG); - if (arg_count == 1) { - uint32_t lsc_poll_interval_ms; - - if (rte_kvargs_process(kvlist, - PMD_BOND_LSC_POLL_PERIOD_KVARG, - &bond_ethdev_parse_time_ms_kvarg, - &lsc_poll_interval_ms) < 0) { - RTE_LOG(INFO, EAL, - "Invalid lsc polling interval value specified for bonded" - " device %s\n", name); - return -1; - } - - if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms) - != 0) { - RTE_LOG(ERR, EAL, - "Failed to set lsc monitor polling interval (%u ms) on" - " bonded device %s\n", lsc_poll_interval_ms, name); - return -1; - } - } else if (arg_count > 1) { - RTE_LOG(INFO, EAL, - "LSC polling interval can be specified only once for bonded" - " device %s\n", name); - return -1; - } - - /* Parse link up interrupt propagation delay */ - arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG); - if (arg_count == 1) { - uint32_t link_up_delay_ms; - - if (rte_kvargs_process(kvlist, - PMD_BOND_LINK_UP_PROP_DELAY_KVARG, - &bond_ethdev_parse_time_ms_kvarg, - &link_up_delay_ms) < 0) { - RTE_LOG(INFO, EAL, - "Invalid link up propagation delay value specified for" - " bonded device %s\n", name); - return -1; - } - - /* Set balance mode transmit policy*/ - if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms) - != 0) { - RTE_LOG(ERR, EAL, - "Failed to set link up propagation delay (%u ms) on bonded" - " device %s\n", link_up_delay_ms, name); - return -1; - } - } else if (arg_count > 1) { - RTE_LOG(INFO, EAL, - "Link up propagation delay can be specified only once for" - " bonded device %s\n", name); - return -1; - } - - /* Parse link down interrupt propagation delay */ - arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG); - if (arg_count == 1) { - uint32_t link_down_delay_ms; - - if (rte_kvargs_process(kvlist, - PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG, - &bond_ethdev_parse_time_ms_kvarg, - &link_down_delay_ms) < 0) { - RTE_LOG(INFO, EAL, - "Invalid link down propagation delay value specified for" - " bonded device %s\n", name); - return -1; - } - - /* Set balance mode transmit policy*/ - if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms) - != 0) { - RTE_LOG(ERR, EAL, - "Failed to set link down propagation delay (%u ms) on" - " bonded device %s\n", link_down_delay_ms, name); - return -1; - } - } else if (arg_count > 1) { - RTE_LOG(INFO, EAL, - "Link down propagation delay can be specified only once for" - " bonded device %s\n", name); - return -1; - } - - return 0; -} - -static struct rte_driver bond_drv = { - .name = "eth_bond", - .type = PMD_VDEV, - .init = bond_init, -}; - -PMD_REGISTER_DRIVER(bond_drv); diff --git a/lib/librte_pmd_bond/rte_eth_bond_private.h b/lib/librte_pmd_bond/rte_eth_bond_private.h deleted file mode 100644 index 45e5c65..0000000 --- a/lib/librte_pmd_bond/rte_eth_bond_private.h +++ /dev/null @@ -1,287 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _RTE_ETH_BOND_PRIVATE_H_ -#define _RTE_ETH_BOND_PRIVATE_H_ - -#include -#include - -#include "rte_eth_bond.h" -#include "rte_eth_bond_8023ad_private.h" -#include "rte_eth_bond_alb.h" - -#define PMD_BOND_SLAVE_PORT_KVARG ("slave") -#define PMD_BOND_PRIMARY_SLAVE_KVARG ("primary") -#define PMD_BOND_MODE_KVARG ("mode") -#define PMD_BOND_XMIT_POLICY_KVARG ("xmit_policy") -#define PMD_BOND_SOCKET_ID_KVARG ("socket_id") -#define PMD_BOND_MAC_ADDR_KVARG ("mac") -#define PMD_BOND_LSC_POLL_PERIOD_KVARG ("lsc_poll_period_ms") -#define PMD_BOND_LINK_UP_PROP_DELAY_KVARG ("up_delay") -#define PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG ("down_delay") - -#define PMD_BOND_XMIT_POLICY_LAYER2_KVARG ("l2") -#define PMD_BOND_XMIT_POLICY_LAYER23_KVARG ("l23") -#define PMD_BOND_XMIT_POLICY_LAYER34_KVARG ("l34") - -#define RTE_BOND_LOG(lvl, msg, ...) \ - RTE_LOG(lvl, PMD, "%s(%d) - " msg "\n", __func__, __LINE__, ##__VA_ARGS__) - -#define BONDING_MODE_INVALID 0xFF - -extern const char *pmd_bond_init_valid_arguments[]; - -extern const char *driver_name; - -/** Port Queue Mapping Structure */ -struct bond_rx_queue { - uint16_t queue_id; - /**< Queue Id */ - struct bond_dev_private *dev_private; - /**< Reference to eth_dev private structure */ - uint16_t nb_rx_desc; - /**< Number of RX descriptors available for the queue */ - struct rte_eth_rxconf rx_conf; - /**< Copy of RX configuration structure for queue */ - struct rte_mempool *mb_pool; - /**< Reference to mbuf pool to use for RX queue */ -}; - -struct bond_tx_queue { - uint16_t queue_id; - /**< Queue Id */ - struct bond_dev_private *dev_private; - /**< Reference to dev private structure */ - uint16_t nb_tx_desc; - /**< Number of TX descriptors available for the queue */ - struct rte_eth_txconf tx_conf; - /**< Copy of TX configuration structure for queue */ -}; - -/** Bonded slave devices structure */ -struct bond_ethdev_slave_ports { - uint8_t slaves[RTE_MAX_ETHPORTS]; /**< Slave port id array */ - uint8_t slave_count; /**< Number of slaves */ -}; - -struct bond_slave_details { - uint8_t port_id; - - uint8_t link_status_poll_enabled; - uint8_t link_status_wait_to_complete; - uint8_t last_link_status; - /**< Port Id of slave eth_dev */ - struct ether_addr persisted_mac_addr; -}; - - -typedef uint16_t (*xmit_hash_t)(const struct rte_mbuf *buf, uint8_t slave_count); - -/** Link Bonding PMD device private configuration Structure */ -struct bond_dev_private { - uint8_t port_id; /**< Port Id of Bonded Port */ - uint8_t mode; /**< Link Bonding Mode */ - - rte_spinlock_t lock; - - uint8_t primary_port; /**< Primary Slave Port */ - uint8_t current_primary_port; /**< Primary Slave Port */ - uint8_t user_defined_primary_port; - /**< Flag for whether primary port is user defined or not */ - - uint8_t balance_xmit_policy; - /**< Transmit policy - l2 / l23 / l34 for operation in balance mode */ - xmit_hash_t xmit_hash; - /**< Transmit policy hash function */ - - uint8_t user_defined_mac; - /**< Flag for whether MAC address is user defined or not */ - uint8_t promiscuous_en; - /**< Enabled/disable promiscuous mode on bonding device */ - uint8_t link_props_set; - /**< flag to denote if the link properties are set */ - - uint8_t link_status_polling_enabled; - uint32_t link_status_polling_interval_ms; - - uint32_t link_down_delay_ms; - uint32_t link_up_delay_ms; - - uint16_t nb_rx_queues; /**< Total number of rx queues */ - uint16_t nb_tx_queues; /**< Total number of tx queues*/ - - uint8_t active_slave_count; /**< Number of active slaves */ - uint8_t active_slaves[RTE_MAX_ETHPORTS]; /**< Active slave list */ - - uint8_t slave_count; /**< Number of bonded slaves */ - struct bond_slave_details slaves[RTE_MAX_ETHPORTS]; - /**< Arary of bonded slaves details */ - - struct mode8023ad_private mode4; - uint8_t tlb_slaves_order[RTE_MAX_ETHPORTS]; /* TLB active slaves send order */ - struct mode_alb_private mode6; - - uint32_t rx_offload_capa; /** Rx offload capability */ - uint32_t tx_offload_capa; /** Tx offload capability */ - - struct rte_kvargs *kvlist; - uint8_t slave_update_idx; -}; - -extern struct eth_dev_ops default_dev_ops; - -int -valid_bonded_ethdev(struct rte_eth_dev *eth_dev); - -/* Search given slave array to find possition of given id. - * Return slave pos or slaves_count if not found. */ -static inline uint8_t -find_slave_by_id(uint8_t *slaves, uint8_t slaves_count, uint8_t slave_id) { - - uint8_t pos; - for (pos = 0; pos < slaves_count; pos++) { - if (slave_id == slaves[pos]) - break; - } - - return pos; -} - -int -valid_port_id(uint8_t port_id); - -int -valid_bonded_port_id(uint8_t port_id); - -int -valid_slave_port_id(uint8_t port_id); - -void -deactivate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id); - -void -activate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id); - -void -link_properties_set(struct rte_eth_dev *bonded_eth_dev, - struct rte_eth_link *slave_dev_link); -void -link_properties_reset(struct rte_eth_dev *bonded_eth_dev); - -int -link_properties_valid(struct rte_eth_link *bonded_dev_link, - struct rte_eth_link *slave_dev_link); - -int -mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr); - -int -mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr); - -int -mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev); - -uint8_t -number_of_sockets(void); - -int -bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode); - -int -slave_configure(struct rte_eth_dev *bonded_eth_dev, - struct rte_eth_dev *slave_eth_dev); - -void -slave_remove(struct bond_dev_private *internals, - struct rte_eth_dev *slave_eth_dev); - -void -slave_add(struct bond_dev_private *internals, - struct rte_eth_dev *slave_eth_dev); - -uint16_t -xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count); - -uint16_t -xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count); - -uint16_t -xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count); - -void -bond_ethdev_primary_set(struct bond_dev_private *internals, - uint8_t slave_port_id); - -void -bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type, - void *param); - -int -bond_ethdev_parse_slave_port_kvarg(const char *key __rte_unused, - const char *value, void *extra_args); - -int -bond_ethdev_parse_slave_mode_kvarg(const char *key __rte_unused, - const char *value, void *extra_args); - -int -bond_ethdev_parse_socket_id_kvarg(const char *key __rte_unused, - const char *value, void *extra_args); - -int -bond_ethdev_parse_primary_slave_port_id_kvarg(const char *key __rte_unused, - const char *value, void *extra_args); - -int -bond_ethdev_parse_balance_xmit_policy_kvarg(const char *key __rte_unused, - const char *value, void *extra_args); - -int -bond_ethdev_parse_bond_mac_addr_kvarg(const char *key __rte_unused, - const char *value, void *extra_args); - -int -bond_ethdev_parse_time_ms_kvarg(const char *key __rte_unused, - const char *value, void *extra_args); - -void -bond_tlb_disable(struct bond_dev_private *internals); - -void -bond_tlb_enable(struct bond_dev_private *internals); - -void -bond_tlb_activate_slave(struct bond_dev_private *internals); - -#endif diff --git a/lib/librte_pmd_bond/rte_eth_bond_version.map b/lib/librte_pmd_bond/rte_eth_bond_version.map deleted file mode 100644 index 135999e..0000000 --- a/lib/librte_pmd_bond/rte_eth_bond_version.map +++ /dev/null @@ -1,22 +0,0 @@ -DPDK_2.0 { - global: - - rte_eth_bond_8023ad_conf_get; - rte_eth_bond_8023ad_setup; - rte_eth_bond_active_slaves_get; - rte_eth_bond_create; - rte_eth_bond_link_monitoring_set; - rte_eth_bond_mac_address_reset; - rte_eth_bond_mac_address_set; - rte_eth_bond_mode_get; - rte_eth_bond_mode_set; - rte_eth_bond_primary_get; - rte_eth_bond_primary_set; - rte_eth_bond_slave_add; - rte_eth_bond_slave_remove; - rte_eth_bond_slaves_get; - rte_eth_bond_xmit_policy_get; - rte_eth_bond_xmit_policy_set; - - local: *; -};