[dpdk-dev,2/2] bond: add mode 4 support

Message ID 1410963713-13837-3-git-send-email-pawelx.wodkowski@intel.com (mailing list archive)
State Superseded, archived
Headers

Commit Message

Wodkowski, PawelX Sept. 17, 2014, 2:21 p.m. UTC
Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
Signed-off-by: Maciej T Gajdzica <maciejx.t.gajdzica@intel.com>
Reviewed-by: Declan Doherty <declan.doherty@intel.com>
---
 lib/librte_ether/rte_ether.h               |    1 +
 lib/librte_pmd_bond/Makefile               |    1 +
 lib/librte_pmd_bond/rte_eth_bond.h         |    4 +
 lib/librte_pmd_bond/rte_eth_bond_8023ad.c  | 1064 ++++++++++++++++++++++++++++
 lib/librte_pmd_bond/rte_eth_bond_8023ad.h  |  411 +++++++++++
 lib/librte_pmd_bond/rte_eth_bond_api.c     |   28 +-
 lib/librte_pmd_bond/rte_eth_bond_args.c    |    1 +
 lib/librte_pmd_bond/rte_eth_bond_pmd.c     |  179 ++++-
 lib/librte_pmd_bond/rte_eth_bond_private.h |    9 +-
 9 files changed, 1685 insertions(+), 13 deletions(-)
 create mode 100644 lib/librte_pmd_bond/rte_eth_bond_8023ad.c
 create mode 100644 lib/librte_pmd_bond/rte_eth_bond_8023ad.h
  

Comments

Neil Horman Sept. 17, 2014, 3:13 p.m. UTC | #1
On Wed, Sep 17, 2014 at 03:21:53PM +0100, Pawel Wodkowski wrote:
> Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
> Signed-off-by: Maciej T Gajdzica <maciejx.t.gajdzica@intel.com>
> Reviewed-by: Declan Doherty <declan.doherty@intel.com>
> ---
>  lib/librte_ether/rte_ether.h               |    1 +
>  lib/librte_pmd_bond/Makefile               |    1 +
>  lib/librte_pmd_bond/rte_eth_bond.h         |    4 +
>  lib/librte_pmd_bond/rte_eth_bond_8023ad.c  | 1064 ++++++++++++++++++++++++++++
>  lib/librte_pmd_bond/rte_eth_bond_8023ad.h  |  411 +++++++++++
>  lib/librte_pmd_bond/rte_eth_bond_api.c     |   28 +-
>  lib/librte_pmd_bond/rte_eth_bond_args.c    |    1 +
>  lib/librte_pmd_bond/rte_eth_bond_pmd.c     |  179 ++++-
>  lib/librte_pmd_bond/rte_eth_bond_private.h |    9 +-
>  9 files changed, 1685 insertions(+), 13 deletions(-)
>  create mode 100644 lib/librte_pmd_bond/rte_eth_bond_8023ad.c
>  create mode 100644 lib/librte_pmd_bond/rte_eth_bond_8023ad.h
> 
> diff --git a/lib/librte_ether/rte_ether.h b/lib/librte_ether/rte_ether.h
> index 2e08f23..1a3711b 100644
> --- a/lib/librte_ether/rte_ether.h
> +++ b/lib/librte_ether/rte_ether.h
> @@ -293,6 +293,7 @@ struct vlan_hdr {
>  #define ETHER_TYPE_RARP 0x8035 /**< Reverse Arp Protocol. */
>  #define ETHER_TYPE_VLAN 0x8100 /**< IEEE 802.1Q VLAN tagging. */
>  #define ETHER_TYPE_1588 0x88F7 /**< IEEE 802.1AS 1588 Precise Time Protocol. */
> +#define ETHER_TYPE_SLOW 0x8809 /**< Slow protocols (LACP and Marker). */
>  
>  #ifdef __cplusplus
>  }
> diff --git a/lib/librte_pmd_bond/Makefile b/lib/librte_pmd_bond/Makefile
> index 953d75e..c2312c2 100644
> --- a/lib/librte_pmd_bond/Makefile
> +++ b/lib/librte_pmd_bond/Makefile
> @@ -44,6 +44,7 @@ CFLAGS += $(WERROR_FLAGS)
>  #
>  SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_api.c
>  SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_pmd.c
> +SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_8023ad.c
>  SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_args.c
>  
>  #
> diff --git a/lib/librte_pmd_bond/rte_eth_bond.h b/lib/librte_pmd_bond/rte_eth_bond.h
> index bd59780..6aac4ec 100644
> --- a/lib/librte_pmd_bond/rte_eth_bond.h
> +++ b/lib/librte_pmd_bond/rte_eth_bond.h
> @@ -75,6 +75,10 @@ extern "C" {
>  /**< Broadcast (Mode 3).
>   * In this mode all transmitted packets will be transmitted on all available
>   * active slaves of the bonded. */
> +#define BONDING_MODE_8023AD				(4)
> +/**< 802.3AD (Mode 4).
> + * In this mode transmission and reception of packets is managed by LACP
> + * protocol specified in 802.3AD documentation. */
>  
>  /* Balance Mode Transmit Policies */
>  #define BALANCE_XMIT_POLICY_LAYER2		(0)
> diff --git a/lib/librte_pmd_bond/rte_eth_bond_8023ad.c b/lib/librte_pmd_bond/rte_eth_bond_8023ad.c
> new file mode 100644
> index 0000000..6ce6efb
> --- /dev/null
> +++ b/lib/librte_pmd_bond/rte_eth_bond_8023ad.c
> @@ -0,0 +1,1064 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <stddef.h>
> +#include <string.h>
> +
> +#include <rte_alarm.h>
> +#include <rte_malloc.h>
> +#include <rte_errno.h>
> +
> +#include "rte_eth_bond_private.h"
> +#include "rte_eth_bond_8023ad.h"
> +
> +#include <rte_cycles.h>
> +
> +#define RTE_LIBRTE_BOND_DEBUG_8023AX
> +
> +#ifdef RTE_LIBRTE_BOND_DEBUG_8023AX
> +#define BOND_ASSERT(expr) \
> +	((expr) ? (void) (0) \
> +	: rte_panic("%s(%d): assertion failed" __STRING(expr), __FILE__, __LINE__))
> +#else
> +#define BOND_ASSERT(expr) do { } while (0)
> +#endif
It seems like every library re-invents this wheel.  Maybe merge them into a
single assert macro available from a common header?


> +
> +#ifdef RTE_LIBRTE_BOND_DEBUG_8023AX
> +#define _PORT_ID internals->active_slaves[port_num]
> +#define BOND_DEBUG(fmt, ...) RTE_LOG(DEBUG, PMD, "%6u [Port %u: %s] " fmt, \
> +			bond_dbg_get_time_diff(), _PORT_ID, __FUNCTION__, ##__VA_ARGS__)
> +
> +static unsigned
> +bond_dbg_get_time_diff(void)
> +{
> +	static unsigned ms_start = 0;
> +	struct timespec t;
> +	uint64_t ms;
> +
> +	clock_gettime(CLOCK_MONOTONIC, &t);
> +	ms = (((long)t.tv_sec * 1000000000L) + t.tv_nsec) / 1000000L;
> +
> +	if (ms_start == 0)
> +		ms_start = ms;
> +
> +	return ms - ms_start;
> +}
> +
> +static void
> +bond_print_lacp(struct lacpdu *l)
> +{
> +	char a_state[256] = { 0 };
> +	char p_state[256] = { 0 };
> +	static const char *state_labels[] = {
> +		"ACT", "TIMEOUT", "AGG", "SYNC", "COL", "DIST", "DEF", "EXP"
> +	};
> +
> +	int a_len = 0;
> +	int p_len = 0;
> +	uint8_t i;
> +
> +	for (i = 0; i < 8; i++) {
> +		if ((l->actor.state >> i) & 1) {
> +			a_len += snprintf(a_state + a_len, sizeof(a_state) - a_len, "%s ",
> +				state_labels[i]);
> +		}
> +
> +		if ((l->partner.state >> i) & 1) {
> +			p_len += snprintf(p_state + p_len, sizeof(p_state) - p_len, "%s ",
> +				state_labels[i]);
> +		}
> +	}
> +
> +	if (a_len && a_state[a_len-1] == ' ')
> +		a_state[a_len-1] = '\0';
> +
> +	if (p_len && p_state[p_len-1] == ' ')
> +			p_state[p_len-1] = '\0';
> +
> +	RTE_LOG(DEBUG, PMD, "LACP: {\n"\
> +			"  subtype= %02X\n"\
> +			"  ver_num=%02X\n"\
> +			"  actor={ tlv=%02X, len=%02X\n"\
> +			"    pri=%04X, system=(ADDRESS), key=%04X, p_pri=%04X p_num=%04X\n"\
> +			"       state={ %s }\n"\
> +			"  }\n"\
> +			"  partner={ tlv=%02X, len=%02X\n"\
> +			"    pri=%04X, system=(ADDRESS), key=%04X, p_pri=%04X p_num=%04X\n"\
> +			"       state={ %s }\n"\
> +			"  }\n"\
> +			"  collector={info=%02X, length=%02X, max_delay=%04X\n, " \
> +							"type_term=%02X, terminator_length = %02X}\n",\
> +			l->subtype,\
> +			l->version_number,\
> +			l->actor.tlv_type_info,\
> +			l->actor.info_length,\
> +			l->actor.port_params.system_priority,\
> +			l->actor.port_params.key,\
> +			l->actor.port_params.port_priority,\
> +			l->actor.port_params.port_number,\
> +			a_state,\
> +			l->partner.tlv_type_info,\
> +			l->partner.info_length,\
> +			l->partner.port_params.system_priority,\
> +			l->partner.port_params.key,\
> +			l->partner.port_params.port_priority,\
> +			l->partner.port_params.port_number,\
> +			p_state,\
> +			l->tlv_type_collector_info,\
> +			l->collector_info_length,\
> +			l->collector_max_delay,\
> +			l->tlv_type_terminator,\
> +			l->terminator_length);
> +
> +}
> +#define BOND_PRINT_LACP(lacpdu) bond_print_lacp(lacpdu)
> +
> +#else
> +#define BOND_PRINT_LACP(lacpdu) do { } while (0)
> +#define BOND_DEBUG(fmt, ...) do { } while (0)
> +#endif
> +
> +static const struct ether_addr lacp_mac_addr = {
> +	.addr_bytes = {0x01, 0x80, 0xC2, 0x00, 0x00, 0x02}
> +};
> +
> +static void
> +timer_expired_cb(void *arg)
> +{
> +	enum timer_state *timer = arg;
> +
> +	BOND_ASSERT(*timer == TIMER_RUNNING);
> +	*timer = TIMER_EXPIRED;
> +}
> +
> +static void
> +timer_cancel(enum timer_state *timer)
> +{
> +	rte_eal_alarm_cancel(&timer_expired_cb, timer);
> +	*timer = TIMER_NOT_STARTED;
> +}
> +
> +static void
> +timer_set(enum timer_state *timer, uint64_t timeout)
> +{
> +	rte_eal_alarm_cancel(&timer_expired_cb, timer);
> +	rte_eal_alarm_set(timeout * 1000, &timer_expired_cb, timer);
> +	*timer = TIMER_RUNNING;
> +}
> +
> +static bool
> +timer_is_expired(enum timer_state *timer)
> +{
> +	return *timer == TIMER_EXPIRED;
> +}
> +
> +static void
> +record_default(struct port *port)
> +{
> +	/* Record default parametes for partner. Partner admin parameters
> +	 * are not implemented so nothing to copy. Only mark actor that parner is
> +	 * in defaulted state. */
> +	port->partner_state = STATE_LACP_ACTIVE;
> +	ACTOR_STATE_SET(port, DEFAULTED);
> +}
> +
> +/** Function handles rx state machine.
> + *
> + * This function implements Receive State Machine from point 5.4.12 in
> + * 802.1AX documentation. It should be called periodically.
> + *
> + * @param lacpdu		LACPDU received.
> + * @param port			Port on which LACPDU was received.
> + */
> +static void
> +rx_machine(struct bond_dev_private *internals, uint8_t port_num,
> +	struct lacpdu *lacp)
> +{
> +	struct port *port = &internals->mode4.port_list[port_num];
> +
> +	if (SM_FLAG(port, BEGIN)) {
> +		/* Initialize stuff */
> +		BOND_DEBUG("-> INITIALIZE\n");
> +		SM_FLAG_CLR(port, MOVED);
> +		port->selected = UNSELECTED;
> +
> +		record_default(port);
> +
> +		ACTOR_STATE_CLR(port, EXPIRED);
> +		timer_cancel(&port->current_while_timer);
> +
> +		/* DISABLED: On initialization partner is out of sync */
> +		PARTNER_STATE_CLR(port, SYNCHRONIZATION);
> +
> +		/* LACP DISABLED stuff if LACP not enabled on this port */
> +		if (!SM_FLAG(port, LACP_ENABLED))
> +			PARTNER_STATE_CLR(port, AGGREGATION);
> +	}
> +
> +	if (!SM_FLAG(port, LACP_ENABLED)) {
> +		/* Update parameters only if state changed */
> +		if (port->current_while_timer) {
> +			port->selected = UNSELECTED;
> +			record_default(port);
> +			PARTNER_STATE_CLR(port, AGGREGATION);
> +			ACTOR_STATE_CLR(port, EXPIRED);
> +			timer_cancel(&port->current_while_timer);
> +		}
> +		return;
> +	}
> +
> +	if (lacp) {
> +		BOND_DEBUG("LACP -> CURRENT\n");
> +		BOND_PRINT_LACP(lacp);
> +		/* Update selected flag. If partner parameters are defaulted assume they
> +		 * are match. If not defaulted  compare LACP actor with ports parner
> +		 * params. */
> +		if (!(port->actor_state & STATE_DEFAULTED) &&
> +			(((port->partner_state ^ lacp->actor.state) & STATE_AGGREGATION) ||
> +				memcmp(&port->partner, &lacp->actor.port_params,
> +					sizeof(port->partner)) != 0)) {
> +			BOND_DEBUG("selected <- UNSELECTED\n");
> +			port->selected = UNSELECTED;
> +		}
> +
> +		/* Record this PDU actor params as partner params */
> +		memcpy(&port->partner, &lacp->actor.port_params,
> +			sizeof(struct port_params));
> +		port->partner_state = lacp->actor.state;
> +		ACTOR_STATE_CLR(port, DEFAULTED);
> +
> +		/* Update NTT if partners information are outdated */
> +		uint8_t state_mask = STATE_LACP_ACTIVE | STATE_LACP_SHORT_TIMEOUT |
> +			STATE_SYNCHRONIZATION | STATE_AGGREGATION;
> +
> +		if (((port->actor_state ^ lacp->partner.state) & state_mask) ||
> +				memcmp(&port->actor, &lacp->partner.port_params,
> +					sizeof(struct port_params)) != 0) {
> +			port->sm_flags |= SM_FLAGS_NTT;
> +		}
> +
> +		/* If LACP partner params match this port actor params */
> +		if (memcmp(&port->actor, &lacp->partner.port_params,
> +			    sizeof(port->actor)) == 0 &&
> +			(port->partner_state & STATE_AGGREGATION) == (port->actor_state
> +			    & STATE_AGGREGATION))
> +			PARTNER_STATE_SET(port, SYNCHRONIZATION);
> +		else if (!(port->partner_state & STATE_AGGREGATION) &&
> +			    (port->actor_state & STATE_AGGREGATION))
> +			PARTNER_STATE_SET(port, SYNCHRONIZATION);
> +		else
> +			PARTNER_STATE_CLR(port, SYNCHRONIZATION);
> +
> +		if (port->actor_state & STATE_LACP_SHORT_TIMEOUT)
> +			timer_set(&port->current_while_timer, BOND_8023AD_SHORT_TIMEOUT_MS);
> +		else
> +			timer_set(&port->current_while_timer, BOND_8023AD_LONG_TIMEOUT_MS);
> +
> +		ACTOR_STATE_CLR(port, EXPIRED);
> +		return; /* No state change */
> +	}
> +
> +	if (port->current_while_timer != TIMER_RUNNING) {
> +		ACTOR_STATE_SET(port, EXPIRED);
> +		PARTNER_STATE_CLR(port, SYNCHRONIZATION);
> +		PARTNER_STATE_SET(port, LACP_SHORT_TIMEOUT);
> +		timer_set(&port->current_while_timer, BOND_8023AD_SHORT_TIMEOUT_MS);
> +	}
> +}
> +
> +/**
> + * Function handles periodic tx state machine.
> + *
> + * Function implements Periodic Transmission state machine from point 5.4.13
> + * in 802.1AX documentation. It should be called periodically.
> + *
> + * @param port			Port to handle state machine.
> + */
> +static void
> +periodic_machine(struct bond_dev_private *internals, uint8_t port_num)
> +{
> +	struct port *port = &internals->mode4.port_list[port_num];
> +	/* Calculate if either site is LACP enabled */
> +	uint32_t timeout;
> +	uint16_t sm_flags = port->sm_flags;
> +	uint8_t active = ACTOR_STATE(port, LACP_ACTIVE) ||
> +		PARTNER_STATE(port, LACP_ACTIVE);
> +
> +	uint8_t is_partner_fast, was_partner_fast;
> +	/* No periodic is on BEGIN, LACP DISABLE or when both sides are pasive */
> +	if ((sm_flags & SM_FLAGS_BEGIN) || !(sm_flags & SM_FLAGS_LACP_ENABLED) ||
> +		    active == 0) {
> +		timer_cancel(&port->periodic_timer);
> +		port->tx_machine_timer = TIMER_EXPIRED;
> +		sm_flags &= ~SM_FLAGS_PARTNER_SHORT_TIMEOUT;
> +		port->sm_flags = sm_flags;
> +
> +		BOND_DEBUG("-> NO_PERIODIC ( %s%s%s)\n",
> +			SM_FLAG(port, BEGIN) ? "begind " : "",
> +			SM_FLAG(port, LACP_ENABLED) ? "" : "LACP disabled ",
> +			active ? "LACP active " : "LACP pasive ");
> +		return;
> +	}
> +
> +	is_partner_fast = !!(port->partner_state & STATE_LACP_SHORT_TIMEOUT);
> +	was_partner_fast = !!(port->sm_flags & SM_FLAGS_PARTNER_SHORT_TIMEOUT);
> +
> +	/* If periodic timer is not started, transit from NO PERIODIC to FAST/SLOW.
> +	 * Other case: check if timer expire or partners settings changed. */
> +	if (port->periodic_timer != TIMER_NOT_STARTED) {
> +		if (timer_is_expired(&port->periodic_timer))
> +			sm_flags |= SM_FLAGS_NTT;
> +		else if (is_partner_fast != was_partner_fast) {
> +			/* Partners timeout  was slow and now it is fast -> send LACP.
> +			 * In other case (was fast and now it is slow) just switch
> +			 * timeout to slow without forcing send of LACP (because standard
> +			 * say so)*/
> +			if (!is_partner_fast)
> +				sm_flags |= SM_FLAGS_NTT;
> +		} else
> +			return; /* Nothing changed */
> +	}
> +
> +	/* Handle state transition to FAST/SLOW LACP timeout */
> +	if (is_partner_fast) {
> +		timeout = BOND_8023AD_FAST_PERIODIC_MS;
> +		sm_flags |= SM_FLAGS_PARTNER_SHORT_TIMEOUT;
> +	} else {
> +		timeout = BOND_8023AD_SLOW_PERIODIC_MS;
> +		sm_flags &= ~SM_FLAGS_PARTNER_SHORT_TIMEOUT;
> +	}
> +
> +	timer_set(&port->periodic_timer, timeout);
> +	port->sm_flags = sm_flags;
> +}
> +
> +/**
> + * Function handles mux state machine.
> + *
> + * Function implements Mux Machine from point 5.4.15 in 802.1AX documentation.
> + * It should be called periodically.
> + *
> + * @param port			Port to handle state machine.
> + */
> +static int
> +mux_machine(struct bond_dev_private *internals, uint8_t port_num)
> +{
> +	bool ntt = false;
> +	struct port *port = &internals->mode4.port_list[port_num];
> +
> +	/* Save current state for later use */
> +	const uint8_t state_mask = STATE_SYNCHRONIZATION | STATE_DISTRIBUTING |
> +		STATE_COLLECTING;
> +
> +	/* Enter DETACHED state on BEGIN condition or from any other state if
> +	 * port was unselected */
> +	if (SM_FLAG(port, BEGIN) ||
> +			port->selected == UNSELECTED || (port->selected == STANDBY &&
> +				(port->actor_state & state_mask) != 0)) {
> +		/* detach mux from aggregator not used */
> +		port->actor_state &= ~state_mask;
> +		/* Set ntt to true if BEGIN condition or transition from any other state
> +		 * which is indicated that wait_while_timer was started */
> +		if (SM_FLAG(port, BEGIN) ||
> +				port->wait_while_timer != TIMER_NOT_STARTED) {
> +			SM_FLAG_SET(port, NTT);
> +			BOND_DEBUG("-> DETACHED\n");
> +		}
> +		timer_cancel(&port->wait_while_timer);
> +	}
> +
> +	if (port->wait_while_timer == TIMER_NOT_STARTED) {
> +		if (port->selected == SELECTED || port->selected == STANDBY) {
> +			timer_set(&port->wait_while_timer,
> +				BOND_8023AD_AGGREGATE_WAIT_TIMEOUT_MS);
> +
> +			BOND_DEBUG("DETACHED -> WAITING\n");
> +		}
> +		/* Waiting state entered */
> +		return 0;
> +	}
> +
> +	/* Transit next state if port is ready */
> +	if (!timer_is_expired(&port->wait_while_timer))
> +		return 0;
> +
> +	if ((ACTOR_STATE(port, DISTRIBUTING) || ACTOR_STATE(port, COLLECTING)) &&
> +		!PARTNER_STATE(port, SYNCHRONIZATION)) {
> +		/* If in COLLECTING or DISTRIBUTING state and partner becomes out of
> +		 * sync transit to ATACHED state.  */
> +		ACTOR_STATE_CLR(port, DISTRIBUTING);
> +		ACTOR_STATE_CLR(port, COLLECTING);
> +		/* Clear actor sync to activate transit ATACHED in condition bellow */
> +		ACTOR_STATE_CLR(port, SYNCHRONIZATION);
> +		BOND_DEBUG("Out of sync -> ATTACHED\n");
> +	} else if (!ACTOR_STATE(port, SYNCHRONIZATION)) {
> +		/* attach mux to aggregator */
> +		BOND_ASSERT((port->actor_state & (STATE_COLLECTING |
> +			STATE_DISTRIBUTING)) == 0);
> +		ACTOR_STATE_SET(port, SYNCHRONIZATION);
> +		ntt = true;
> +		BOND_DEBUG("ATTACHED Entered\n");
> +	} else if (!ACTOR_STATE(port, COLLECTING)) {
> +		/* Start collecting if in sync */
> +		if (PARTNER_STATE(port, SYNCHRONIZATION)) {
> +			BOND_DEBUG("ATTACHED -> COLLECTING\n");
> +			ACTOR_STATE_SET(port, COLLECTING);
> +		}
> +	} else if (ACTOR_STATE(port, COLLECTING)) {
> +		/* Check if partner is in COLLECTING state. If so this port can
> +		 * distribute frames to it */
> +		if (!ACTOR_STATE(port, DISTRIBUTING)) {
> +			if (PARTNER_STATE(port, COLLECTING)) {
> +				/* Enable  DISTRIBUTING if partner is collecting */
> +				ACTOR_STATE_SET(port, DISTRIBUTING);
> +				ntt = true;
> +				BOND_DEBUG("COLLECTING -> DISTRIBUTING\n");
> +			}
> +		} else {
> +			if (!PARTNER_STATE(port, COLLECTING)) {
> +				/* Disable DISTRIBUTING (enter COLLECTING state) if partner
> +				 * is not collecting */
> +				ACTOR_STATE_CLR(port, DISTRIBUTING);
> +				ntt = true;
> +				BOND_DEBUG("DISTRIBUTING -> COLLECTING\n");
> +			}
> +		}
> +	}
> +
> +	if (ntt != false)
> +		SM_FLAG_SET(port, NTT);
> +
> +	return ntt;
> +}
> +
> +
> +/**
> + * Function handles transmit state machine.
> + *
> + * Function implements Transmit Machine from point 5.4.16 in 802.1AX
> + * documentation.
> + *
> + * @param port
> + */
> +static void
> +tx_machine(struct rte_eth_dev *bond_dev, uint8_t port_num)
> +{
> +	struct bond_dev_private *internals = bond_dev->data->dev_private;
> +	struct port *port = &internals->mode4.port_list[port_num];
> +	struct mode8023ad_data *data = &internals->mode4;
> +
> +	struct slow_protocol_msg *msg;
> +	struct lacpdu_header *hdr;
> +	struct lacpdu *lacpdu;
> +
> +	/* If periodic timer is not running periodic machine is in NO PERIODIC and
> +	 * acording to 802.3ax standard tx machine should not transmit any frames
> +	 * and set ntt to false. */
> +	if (port->periodic_timer == TIMER_NOT_STARTED)
> +		SM_FLAG_CLR(port, NTT);
> +
> +	if (!SM_FLAG(port, NTT) || !timer_is_expired(&port->tx_machine_timer))
> +		return;
> +
> +	/* If all conditions are met construct packet to send */
> +	if (rte_ring_dequeue(data->free_ring, (void **)&msg) == -ENOBUFS) {
> +		BOND_DEBUG("tx_machine: no free_lacpdu_ring\n");
> +		return;
> +	}
> +
> +	msg->pkt = rte_pktmbuf_alloc(data->mbuf_pool);
> +	if (msg->pkt == NULL) {
> +		/* FIXME: temporal workaround, when packets are no freed by driver */
> +		struct bond_rx_queue *bd_rx_q;
> +		uint8_t i;
> +
> +		for (i = 0; i < bond_dev->data->nb_rx_queues; i++) {
> +			bd_rx_q = (struct bond_rx_queue *)bond_dev->data->rx_queues[i];
> +			BOND_ASSERT(bd_rx_q != NULL && bd_rx_q->mb_pool != NULL);
> +			/* Do not use SP or SC pools as this is unsafe. */
> +			if (bd_rx_q->mb_pool->flags & (MEMPOOL_F_SC_GET | MEMPOOL_F_SP_PUT))
> +				continue;
> +
> +			msg->pkt = rte_pktmbuf_alloc(bd_rx_q->mb_pool);
> +			if (msg->pkt) {
> +				RTE_LOG(WARNING, PMD, "Failed to allocate LACP from mode4 pool."
> +				"Packet was allocated from pool of rx queue %u\n", i);
> +				break;
> +			}
> +		}
> +
> +		if (msg->pkt == NULL) {
> +			rte_ring_enqueue(data->free_ring, msg);
> +			RTE_LOG(ERR, PMD, "Failed to allocate LACP packet from pool\n");
> +			return;
> +		}
> +	}
> +	msg->port_id = internals->active_slaves[port_num];
> +	hdr = rte_pktmbuf_mtod(msg->pkt, struct lacpdu_header *);
> +
> +	msg->pkt->pkt.data_len = sizeof(*hdr);
> +	msg->pkt->pkt.pkt_len = sizeof(*hdr);
> +	/* Source and destination MAC */
> +	ether_addr_copy(&lacp_mac_addr, &hdr->eth_hdr.d_addr);
> +	mac_address_get(bond_dev, &hdr->eth_hdr.s_addr);
> +	hdr->eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_SLOW);
> +
> +	port = &data->port_list[port_num];
> +	lacpdu = &hdr->lacpdu;
> +	memset(lacpdu, 0, sizeof(*lacpdu));
> +
> +	/* Initialize LACP part */
> +	lacpdu->subtype = SUBTYPE_LACP;
> +	lacpdu->version_number = 1;
> +
> +	/* ACTOR */
> +	lacpdu->actor.tlv_type_info = TLV_TYPE_ACTOR_INFORMATION;
> +	lacpdu->actor.info_length = sizeof(struct lacpdu_actor_partner_params);
> +	memcpy(&hdr->lacpdu.actor.port_params, &port->actor,
> +			sizeof(port->actor));
> +	lacpdu->actor.state = port->actor_state;
> +
> +	/* PARTNER */
> +	lacpdu->partner.tlv_type_info = TLV_TYPE_PARTNER_INFORMATION;
> +	lacpdu->partner.info_length = sizeof(struct lacpdu_actor_partner_params);
> +	memcpy(&lacpdu->partner.port_params, &port->partner,
> +			sizeof(struct port_params));
> +	lacpdu->partner.state = port->partner_state;
> +
> +	/* Other fields */
> +	lacpdu->tlv_type_collector_info = TLV_TYPE_COLLECTOR_INFORMATION;
> +	lacpdu->collector_info_length = 0x10;
> +	lacpdu->collector_max_delay = 0;
> +
> +	lacpdu->tlv_type_terminator = TLV_TYPE_TERMINATOR_INFORMATION;
> +	lacpdu->terminator_length = 0;
> +
> +	if (rte_ring_enqueue(data->tx_ring, msg) == -ENOBUFS) {
> +		/* If TX ring full, drop packet and free message. Retransmission
> +		 * will happen in next function call. */
> +		rte_pktmbuf_free(msg->pkt);
> +		rte_ring_enqueue(data->free_ring, msg);
> +
> +		RTE_LOG(ERR, PMD, "Failed to enqueue LACP packet into tx ring");
> +		return;
> +	}
> +
> +	BOND_DEBUG("sending LACP frame\n");
> +	BOND_PRINT_LACP(lacpdu);
> +
> +	SM_FLAG_CLR(port, NTT);
> +	/* Add 10% random backoff time to better distrube slow packets
> +	 * between tx bursts. */
> +	timer_set(&port->tx_machine_timer, BOND_8023AD_TX_PERIOD_MS +
> +		rand() % ((BOND_8023AD_TX_PERIOD_MS * 10) / 100));
> +}
> +
> +/**
> + * Function assigns port to aggregator.
> + *
> + * @param bond_dev_private	Pointer to bond_dev_private structure.
> + * @param port_pos			Port to assign.
> + */
> +static void
> +selection_logic(struct bond_dev_private *internals, uint8_t port_num)
> +{
> +	struct mode8023ad_data *data = &internals->mode4;
> +	struct port *agg, *port, *port_list;
> +	uint8_t ports_count;
> +	uint8_t i;
> +
> +	ports_count = internals->slave_count;
> +	port_list = data->port_list;
> +	port = &port_list[port_num];
> +
> +	/* Skip port if it is selected */
> +	if (port->selected == SELECTED)
> +		return;
> +
> +	/* Search for aggregator suitable for this port */
> +	for (i = 0; i < ports_count; ++i) {
> +		agg = &port_list[i];
> +		/* Skip ports that are not aggreagators */
> +		if (agg->used_agregator_idx != i && i == port_num)
> +			continue;
> +
> +		/* Actors system ID is not checked since all slave device have the same
> +		 * ID (MAC address). */
> +		if ((agg->actor.key == port->actor.key &&
> +			agg->partner.system_priority == port->partner.system_priority &&
> +			is_same_ether_addr(&agg->partner.system, &port->partner.system) == 1
> +			&& (agg->partner.key == port->partner.key)) &&
> +			is_zero_ether_addr(&port->partner.system) != 1 &&
> +			(agg->actor.key &
> +				rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) != 0) {
> +
> +			port->used_agregator_idx = i;
> +			break;
> +		}
> +	}
> +
> +	/* By default, port uses it self as agregator */
> +	if (i == ports_count)
> +		port->used_agregator_idx = port_num;
> +
> +	port->selected = SELECTED;
> +
> +	BOND_DEBUG("-> SELECTED: ID=%3u pos=%3u\n"
> +		"\t%s ID=%3u pos=%3u\n",
> +		internals->active_slaves[port_num], port_num,
> +		port->used_agregator_idx == port_num ?
> +			"agregator not found, using default" : "agregator found",
> +		port->used_agregator_idx,
> +		internals->active_slaves[port->used_agregator_idx]);
> +}
> +
> +/**
> + * Helper function which updates current port
> + */
> +static void
> +update_mux_slaves(struct bond_dev_private *internals)
> +{
> +	struct mode8023ad_data *data = &internals->mode4;
> +	struct port *port;
> +	uint8_t current[RTE_MAX_ETHPORTS];
> +	uint8_t count = 0;
> +	uint8_t i;
> +
> +	for (i = 0; i < internals->slave_count; i++) {
> +		port = &data->port_list[i];
> +		if (ACTOR_STATE(port, DISTRIBUTING))
> +			current[count++] = i;
> +	}
> +
> +	memcpy(data->distibuting_slaves, current, sizeof(current[0]) * count);
> +	data->distibuting_slaves_count = count;
> +}
> +
> +/* Function maps DPDK speed to bonding speed stored in key field */
> +static uint16_t
> +link_speed_key(uint16_t speed) {
> +	uint16_t key_speed;
> +
> +	switch (speed) {
> +	case ETH_LINK_SPEED_AUTONEG:
> +		key_speed = 0x00;
> +		break;
> +	case ETH_LINK_SPEED_10:
> +		key_speed = BOND_LINK_SPEED_KEY_10M;
> +		break;
> +	case ETH_LINK_SPEED_100:
> +		key_speed = BOND_LINK_SPEED_KEY_100M;
> +		break;
> +	case ETH_LINK_SPEED_1000:
> +		key_speed = BOND_LINK_SPEED_KEY_1000M;
> +		break;
> +	case ETH_LINK_SPEED_10G:
> +		key_speed = BOND_LINK_SPEED_KEY_10G;
> +		break;
> +	case ETH_LINK_SPEED_20G:
> +		key_speed = BOND_LINK_SPEED_KEY_20G;
> +		break;
> +	case ETH_LINK_SPEED_40G:
> +		key_speed = BOND_LINK_SPEED_KEY_40G;
> +		break;
> +	default:
> +		/* Unknown speed*/
> +		key_speed = 0xFFFF;
> +	}
> +
> +	return key_speed;
> +}
> +
> +static void
> +bond_mode_8023ad_periodic_cb(void *arg)
> +{
> +	struct rte_eth_dev *bond_dev = arg;
> +	struct bond_dev_private *internals = bond_dev->data->dev_private;
> +	struct mode8023ad_data *data = &internals->mode4;
> +
> +	struct slow_protocol_frame *slow_hdr;
> +	struct rte_eth_link link_info;
> +
> +	struct slow_protocol_msg *msgs[BOND_MODE_8023AX_RX_RING_SIZE];
> +	uint16_t port_num, j, nb_msgs;
> +	/* if not 0 collecting/distibuting array need update */
> +	uint16_t slaves_changed = 0;
> +	bool machines_invoked;
> +
> +	/* Update link status on each port */
> +	for (port_num = 0; port_num < internals->active_slave_count; port_num++) {
> +		uint8_t key;
> +
> +		rte_eth_link_get(internals->active_slaves[port_num], &link_info);
> +		if (link_info.link_status != 0) {
> +			key = link_speed_key(link_info.link_speed) << 1;
> +			if (link_info.link_duplex == ETH_LINK_FULL_DUPLEX)
> +				key |= BOND_LINK_FULL_DUPLEX_KEY;
> +		} else
> +			key = 0;
> +
> +		data->port_list[port_num].actor.key = rte_cpu_to_be_16(key);
> +	}
> +
> +	nb_msgs = (uint16_t)rte_ring_dequeue_burst(data->rx_ring, (void **) msgs,
> +		BOND_MODE_8023AX_RX_RING_SIZE);
> +
> +	for (port_num = 0; port_num < internals->active_slave_count; port_num++) {
> +		struct port *port = &data->port_list[port_num];
> +		if ((port->actor.key &
> +				rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) == 0) {
> +
> +			SM_FLAG_SET(port, BEGIN);
> +
> +			/* LACP is disabled on half duples or link is down */
> +			if (SM_FLAG(port, LACP_ENABLED)) {
> +				/* If port was enabled set it to BEGIN state */
> +				SM_FLAG_CLR(port, LACP_ENABLED);
> +				ACTOR_STATE_CLR(port, DISTRIBUTING);
> +				ACTOR_STATE_CLR(port, COLLECTING);
> +				slaves_changed++;
> +			}
> +
> +			BOND_DEBUG("Port %u is not LACP capable!\n",
> +				internals->active_slaves[port_num]);
> +			/* Skip this port processing */
> +			continue;
> +		}
> +
> +		SM_FLAG_SET(port, LACP_ENABLED);
> +		machines_invoked = false;
> +		/* Find LACP packet */
> +		for (j = 0; j < nb_msgs; j++) {
> +			if (msgs[j] == NULL || msgs[j]->port_id !=
> +					internals->active_slaves[port_num])
> +				continue;
> +
> +			slow_hdr = rte_pktmbuf_mtod(msgs[j]->pkt,
> +					struct slow_protocol_frame *);
> +
> +			if (slow_hdr->slow_protocol.subtype == SLOW_SUBTYPE_LACP) {
> +				/* This is LACP frame so pass it to rx_machine */
> +				struct lacpdu *lacp = (struct lacpdu *)&slow_hdr->slow_protocol;
> +				/* Invoke state machines on every active slave port */
> +				rx_machine(internals, port_num, lacp);
> +				periodic_machine(internals, port_num);
> +				slaves_changed += mux_machine(internals, port_num);
> +				tx_machine(bond_dev, port_num);
> +				selection_logic(internals, port_num);
> +
> +				machines_invoked = true;
> +			} else if (slow_hdr->slow_protocol.subtype == SLOW_SUBTYPE_MARKER) {
> +				struct marker *marker;
> +
> +				marker = (struct marker *) &slow_hdr->slow_protocol;
> +				if (marker->tlv_type_marker == MARKER_TLV_TYPE_MARKER_INFO) {
> +					/* Reuse received packet to send frame to Marker Responder
> +					 */
> +					marker->tlv_type_marker = MARKER_TLV_TYPE_MARKER_RESP;
> +
> +					/* Update source MAC, destination MAC is multicast so we
> +					 * don't update it */
> +					mac_address_get(bond_dev, &slow_hdr->eth_hdr.s_addr);
> +
> +					if (rte_ring_enqueue(data->tx_ring, msgs[j]) == -ENOBUFS) {
> +						RTE_LOG(ERR, PMD,
> +						"Failed to enqueue packet into tx ring");
> +						rte_pktmbuf_free(msgs[j]->pkt);
> +						rte_ring_enqueue(data->free_ring, msgs[j]);
> +					}
> +
> +					msgs[j] = NULL;
> +				}
> +			}
> +		}
> +
> +		if (machines_invoked == false) {
> +			rx_machine(internals, port_num, NULL);
> +			periodic_machine(internals, port_num);
> +			slaves_changed += mux_machine(internals, port_num);
> +			tx_machine(bond_dev, port_num);
> +			selection_logic(internals, port_num);
> +			machines_invoked = true;
> +		}
> +
> +		SM_FLAG_CLR(port, BEGIN);
> +	}
> +
> +	/* Update mux if something changed */
> +	if (slaves_changed > 0) {
> +		update_mux_slaves(internals);
> +		BOND_DEBUG("mux count %u [%2u%s%2u%s%2u%s%2u%s%s]\n",
> +			data->distibuting_slaves_count,
> +			data->distibuting_slaves[0],
> +			data->distibuting_slaves_count > 0 ? " " : "\b\b",
> +			data->distibuting_slaves[1],
> +			data->distibuting_slaves_count > 1 ? " " : "\b\b",
> +			data->distibuting_slaves[2],
> +			data->distibuting_slaves_count > 2 ? " " : "\b\b",
> +			data->distibuting_slaves[3],
> +			data->distibuting_slaves_count > 3 ? " " : "\b\b",
> +			data->distibuting_slaves_count > 4 ? "..." : "");
> +	}
> +
> +	/* Free packets that was not reused */
> +	for (port_num = 0; port_num < nb_msgs; port_num++) {
> +		if (msgs[port_num] != NULL) {
> +			rte_pktmbuf_free(msgs[port_num]->pkt);
> +			rte_ring_enqueue(data->free_ring, msgs[port_num]);
> +		}
> +	}
> +
> +	rte_eal_alarm_set(BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * 1000,
> +			bond_mode_8023ad_periodic_cb, arg);
> +}
> +
> +static void
> +bond_mode_8023ad_activate_slave(struct rte_eth_dev *bond_dev, uint8_t num)
> +{
> +	struct bond_dev_private *internals = bond_dev->data->dev_private;
> +	struct mode8023ad_data *data = &internals->mode4;
> +
> +	struct port *port = &data->port_list[internals->active_slave_count];
> +	struct port_params initial = {
> +			.system = { { 0 } },
> +			.system_priority = rte_cpu_to_be_16(0xFFFF),
> +			.key = rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY),
> +			.port_priority = rte_cpu_to_be_16(0x00FF),
> +			.port_number = 0,
> +	};
> +
> +	memcpy(&port->actor, &initial, sizeof(struct port_params));
> +	mac_address_get(bond_dev, &port->actor.system);
> +	port->actor.port_number =
> +		slave_id_to_port_number(internals->active_slaves[num]);
> +
> +	memcpy(&port->partner, &initial, sizeof(struct port_params));
> +
> +	/* default states */
> +	port->actor_state = STATE_AGGREGATION | STATE_LACP_ACTIVE | STATE_DEFAULTED;
> +	port->partner_state = STATE_LACP_ACTIVE;
> +	port->sm_flags = SM_FLAGS_BEGIN;
> +
> +	/* use this port as agregator */
> +	port->used_agregator_idx = num;
> +}
> +
> +void
> +bond_mode_8023ad_slave_append(struct rte_eth_dev *bond_dev)
> +{
> +	struct bond_dev_private *internals = bond_dev->data->dev_private;
> +
> +	bond_mode_8023ad_activate_slave(bond_dev, internals->active_slave_count);
> +}
> +
> +int
> +bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *bond_dev,
> +		uint8_t slave_pos)
> +{
> +	struct bond_dev_private *internals = bond_dev->data->dev_private;
> +	struct mode8023ad_data *data = &internals->mode4;
> +	struct port *port;
> +	uint8_t i;
> +
> +	bond_mode_8023ad_stop(bond_dev);
> +
> +	/* Exclude slave from transmit policy. If this slave is an aggregator
> +	 * make all aggregated slaves unselected to force sellection logic
> +	 * to select suitable aggregator for this port	 */
> +	for (i = 0; i < internals->active_slave_count; i++) {
> +		port = &data->port_list[slave_pos];
> +		if (port->used_agregator_idx == slave_pos) {
> +			port->selected = UNSELECTED;
> +			port->actor_state &= ~(STATE_SYNCHRONIZATION | STATE_DISTRIBUTING |
> +				STATE_COLLECTING);
> +
> +			/* Use default aggregator */
> +			port->used_agregator_idx = i;
> +		}
> +	}
> +
> +	port = &data->port_list[slave_pos];
> +	timer_cancel(&port->current_while_timer);
> +	timer_cancel(&port->periodic_timer);
> +	timer_cancel(&port->wait_while_timer);
> +	timer_cancel(&port->tx_machine_timer);
> +
These all seem rather racy.  Alarm callbacks are executed with the alarm list
locks not held.  So there is every possibility that you could execute these (or
any timer_cancel calls in this PMD in parallel with the internal state machine
timer callback, and leave either with a corrupted timer list (resulting from a
double free between here, and the actual callback site), or a timer that is
actually still pending when a slave is removed.
  
Wodkowski, PawelX Sept. 18, 2014, 8:07 a.m. UTC | #2
> > +int
> > +bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *bond_dev,
> > +		uint8_t slave_pos)
> > +{
> > +	struct bond_dev_private *internals = bond_dev->data->dev_private;
> > +	struct mode8023ad_data *data = &internals->mode4;
> > +	struct port *port;
> > +	uint8_t i;
> > +
> > +	bond_mode_8023ad_stop(bond_dev);
> > +
> > +	/* Exclude slave from transmit policy. If this slave is an aggregator
> > +	 * make all aggregated slaves unselected to force sellection logic
> > +	 * to select suitable aggregator for this port	 */
> > +	for (i = 0; i < internals->active_slave_count; i++) {
> > +		port = &data->port_list[slave_pos];
> > +		if (port->used_agregator_idx == slave_pos) {
> > +			port->selected = UNSELECTED;
> > +			port->actor_state &= ~(STATE_SYNCHRONIZATION |
> STATE_DISTRIBUTING |
> > +				STATE_COLLECTING);
> > +
> > +			/* Use default aggregator */
> > +			port->used_agregator_idx = i;
> > +		}
> > +	}
> > +
> > +	port = &data->port_list[slave_pos];
> > +	timer_cancel(&port->current_while_timer);
> > +	timer_cancel(&port->periodic_timer);
> > +	timer_cancel(&port->wait_while_timer);
> > +	timer_cancel(&port->tx_machine_timer);
> > +
> These all seem rather racy.  Alarm callbacks are executed with the alarm list
> locks not held.  So there is every possibility that you could execute these (or
> any timer_cancel calls in this PMD in parallel with the internal state machine
> timer callback, and leave either with a corrupted timer list (resulting from a
> double free between here, and the actual callback site),

I don't think so. Yes, callbacks are executed with  alarm list locks not held, but 
this is not the issue because access to list itself is guarded by lock and 
ap->executing variable. So list will not be trashed. Check source of 
eal_alarm_callback(), rte_eal_alarm_set() and rte_eal_alarm_cancel().

> or a timer that is
> actually still pending when a slave is removed.
> 
This is not the issue also, but problem might be similar. I assumed that alarms
are atomic but when I looked at rte alarms closer I saw a race condition
between and rte_eal_alarm_cancel() from  bond_mode_8023ad_stop()
and rte_eal_alarm_set() from state machines callback. This need to be 
reworked in some way.
  
Neil Horman Sept. 18, 2014, 4:02 p.m. UTC | #3
On Thu, Sep 18, 2014 at 08:07:31AM +0000, Wodkowski, PawelX wrote:
> > > +int
> > > +bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *bond_dev,
> > > +		uint8_t slave_pos)
> > > +{
> > > +	struct bond_dev_private *internals = bond_dev->data->dev_private;
> > > +	struct mode8023ad_data *data = &internals->mode4;
> > > +	struct port *port;
> > > +	uint8_t i;
> > > +
> > > +	bond_mode_8023ad_stop(bond_dev);
> > > +
> > > +	/* Exclude slave from transmit policy. If this slave is an aggregator
> > > +	 * make all aggregated slaves unselected to force sellection logic
> > > +	 * to select suitable aggregator for this port	 */
> > > +	for (i = 0; i < internals->active_slave_count; i++) {
> > > +		port = &data->port_list[slave_pos];
> > > +		if (port->used_agregator_idx == slave_pos) {
> > > +			port->selected = UNSELECTED;
> > > +			port->actor_state &= ~(STATE_SYNCHRONIZATION |
> > STATE_DISTRIBUTING |
> > > +				STATE_COLLECTING);
> > > +
> > > +			/* Use default aggregator */
> > > +			port->used_agregator_idx = i;
> > > +		}
> > > +	}
> > > +
> > > +	port = &data->port_list[slave_pos];
> > > +	timer_cancel(&port->current_while_timer);
> > > +	timer_cancel(&port->periodic_timer);
> > > +	timer_cancel(&port->wait_while_timer);
> > > +	timer_cancel(&port->tx_machine_timer);
> > > +
> > These all seem rather racy.  Alarm callbacks are executed with the alarm list
> > locks not held.  So there is every possibility that you could execute these (or
> > any timer_cancel calls in this PMD in parallel with the internal state machine
> > timer callback, and leave either with a corrupted timer list (resulting from a
> > double free between here, and the actual callback site),
> 
> I don't think so. Yes, callbacks are executed with  alarm list locks not held, but 
> this is not the issue because access to list itself is guarded by lock and 
> ap->executing variable. So list will not be trashed. Check source of 
> eal_alarm_callback(), rte_eal_alarm_set() and rte_eal_alarm_cancel().
> 
Yes, you're right, the list is probably safe wht the executing bit.

> > or a timer that is
> > actually still pending when a slave is removed.
> > 
> This is not the issue also, but problem might be similar. I assumed that alarms
> are atomic but when I looked at rte alarms closer I saw a race condition
> between and rte_eal_alarm_cancel() from  bond_mode_8023ad_stop()
> and rte_eal_alarm_set() from state machines callback. This need to be 
> reworked in some way.

Yes, this is what I was referring to:

CPU0				CPU1
rte_eal_alarm_callback		bond_8023ad_deactivate_slave
-bond_8023_ad_periodic_cb	timer_cancel
timer_set

If those timer functions operate on the same timer, the result is that you can
leave the stop/deactivate slave paths with a timer function for that slave still
pending. The bonding mode needs some internal state to serialize those
operations and determine if the timer should be reactivated.

Neil
  
Wodkowski, PawelX Sept. 19, 2014, 12:47 p.m. UTC | #4
> -----Original Message-----
> From: Neil Horman [mailto:nhorman@tuxdriver.com]
> Sent: Thursday, September 18, 2014 18:03
> To: Wodkowski, PawelX
> Cc: dev@dpdk.org; Jastrzebski, MichalX K; Doherty, Declan
> Subject: Re: [dpdk-dev] [PATCH 2/2] bond: add mode 4 support
> 
> On Thu, Sep 18, 2014 at 08:07:31AM +0000, Wodkowski, PawelX wrote:
> > > > +int
> > > > +bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *bond_dev,
> > > > +		uint8_t slave_pos)
> > > > +{
> > > > +	struct bond_dev_private *internals = bond_dev->data->dev_private;
> > > > +	struct mode8023ad_data *data = &internals->mode4;
> > > > +	struct port *port;
> > > > +	uint8_t i;
> > > > +
> > > > +	bond_mode_8023ad_stop(bond_dev);
> > > > +
> > > > +	/* Exclude slave from transmit policy. If this slave is an aggregator
> > > > +	 * make all aggregated slaves unselected to force sellection logic
> > > > +	 * to select suitable aggregator for this port	 */
> > > > +	for (i = 0; i < internals->active_slave_count; i++) {
> > > > +		port = &data->port_list[slave_pos];
> > > > +		if (port->used_agregator_idx == slave_pos) {
> > > > +			port->selected = UNSELECTED;
> > > > +			port->actor_state &= ~(STATE_SYNCHRONIZATION |
> > > STATE_DISTRIBUTING |
> > > > +				STATE_COLLECTING);
> > > > +
> > > > +			/* Use default aggregator */
> > > > +			port->used_agregator_idx = i;
> > > > +		}
> > > > +	}
> > > > +
> > > > +	port = &data->port_list[slave_pos];
> > > > +	timer_cancel(&port->current_while_timer);
> > > > +	timer_cancel(&port->periodic_timer);
> > > > +	timer_cancel(&port->wait_while_timer);
> > > > +	timer_cancel(&port->tx_machine_timer);
> > > > +
> > > These all seem rather racy.  Alarm callbacks are executed with the alarm list
> > > locks not held.  So there is every possibility that you could execute these (or
> > > any timer_cancel calls in this PMD in parallel with the internal state machine
> > > timer callback, and leave either with a corrupted timer list (resulting from a
> > > double free between here, and the actual callback site),
> >
> > I don't think so. Yes, callbacks are executed with  alarm list locks not held, but
> > this is not the issue because access to list itself is guarded by lock and
> > ap->executing variable. So list will not be trashed. Check source of
> > eal_alarm_callback(), rte_eal_alarm_set() and rte_eal_alarm_cancel().
> >
> Yes, you're right, the list is probably safe wht the executing bit.
> 
> > > or a timer that is
> > > actually still pending when a slave is removed.
> > >
> > This is not the issue also, but problem might be similar. I assumed that alarms
> > are atomic but when I looked at rte alarms closer I saw a race condition
> > between and rte_eal_alarm_cancel() from  bond_mode_8023ad_stop()
> > and rte_eal_alarm_set() from state machines callback. This need to be
> > reworked in some way.
> 
> Yes, this is what I was referring to:
> 
> CPU0				CPU1
> rte_eal_alarm_callback		bond_8023ad_deactivate_slave
> -bond_8023_ad_periodic_cb	timer_cancel
> timer_set
> 
> If those timer functions operate on the same timer, the result is that you can
> leave the stop/deactivate slave paths with a timer function for that slave still
> pending. The bonding mode needs some internal state to serialize those
> operations and determine if the timer should be reactivated.
> 
> Neil

I did rethink the issue and problem is much simpler than it looks like. I did the 
following:
1. Change internal state machine alarms to use rte_rdtsc(). This makes all 
 mode 4 internal timer_*() function not affected by any race condition.
2. Do a busy loop when canceling main callback timer until cancel is successfull.
This should do the trick about race condition. Do you agree?

Here is part involving timers I have changed:

static void
-timer_expired_cb(void *arg)
+timer_stop(uint64_t *timer)
 {
-	enum timer_state *timer = arg;
-
-	BOND_ASSERT(*timer == TIMER_RUNNING);
-	*timer = TIMER_EXPIRED;
+	*timer = 0;
 }
 
 static void
-timer_cancel(enum timer_state *timer)
+timer_set(uint64_t *timer, uint64_t timeout_ms)
 {
-	rte_eal_alarm_cancel(&timer_expired_cb, timer);
-	*timer = TIMER_NOT_STARTED;
+	*timer = rte_rdtsc() + timeout_ms * rte_get_tsc_hz() / 1000;
 }
 
+/* Forces given timer to be in expired state. */
 static void
-timer_set(enum timer_state *timer, uint64_t timeout)
+timer_force_expired(uint64_t *timer)
 {
-	rte_eal_alarm_cancel(&timer_expired_cb, timer);
-	rte_eal_alarm_set(timeout * 1000, &timer_expired_cb, timer);
-	*timer = TIMER_RUNNING;
+	*timer = rte_rdtsc();
 }
 
 static bool
-timer_is_expired(enum timer_state *timer)
+timer_is_stopped(uint64_t *timer)
 {
-	return *timer == TIMER_EXPIRED;
+	return *timer == 0;
+}
+
+/* Timer is in running state if it is not stopped nor expired */
+static bool
+timer_is_running(uint64_t *timer)
+{
+	return *timer > 0 && *timer < rte_rdtsc();
+}
+
+
+static bool
+timer_is_expired(uint64_t *timer)
+{
+	return *timer <= rte_rdtsc();
 }
---

And part stopping mode 4 callback.

-int
+void
 bond_mode_8023ad_stop(struct rte_eth_dev *bond_dev)
 {
-	rte_eal_alarm_cancel(bond_mode_8023ad_periodic_cb, bond_dev);
-	return 0;
+	/* Loop untill we cancel pending alarm. Alarm that is executing will
+	 * not be canceled but when reshedules it self it will be canceled. */
+	while (rte_eal_alarm_cancel(&bond_mode_8023ad_periodic_cb, bond_dev) == 0)
+		rte_pause();
 }
  
Neil Horman Sept. 19, 2014, 5:29 p.m. UTC | #5
On Fri, Sep 19, 2014 at 12:47:35PM +0000, Wodkowski, PawelX wrote:
> > -----Original Message-----
> > From: Neil Horman [mailto:nhorman@tuxdriver.com]
> > Sent: Thursday, September 18, 2014 18:03
> > To: Wodkowski, PawelX
> > Cc: dev@dpdk.org; Jastrzebski, MichalX K; Doherty, Declan
> > Subject: Re: [dpdk-dev] [PATCH 2/2] bond: add mode 4 support
> > 
> > On Thu, Sep 18, 2014 at 08:07:31AM +0000, Wodkowski, PawelX wrote:
> > > > > +int
> > > > > +bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *bond_dev,
> > > > > +		uint8_t slave_pos)
> > > > > +{
> > > > > +	struct bond_dev_private *internals = bond_dev->data->dev_private;
> > > > > +	struct mode8023ad_data *data = &internals->mode4;
> > > > > +	struct port *port;
> > > > > +	uint8_t i;
> > > > > +
> > > > > +	bond_mode_8023ad_stop(bond_dev);
> > > > > +
> > > > > +	/* Exclude slave from transmit policy. If this slave is an aggregator
> > > > > +	 * make all aggregated slaves unselected to force sellection logic
> > > > > +	 * to select suitable aggregator for this port	 */
> > > > > +	for (i = 0; i < internals->active_slave_count; i++) {
> > > > > +		port = &data->port_list[slave_pos];
> > > > > +		if (port->used_agregator_idx == slave_pos) {
> > > > > +			port->selected = UNSELECTED;
> > > > > +			port->actor_state &= ~(STATE_SYNCHRONIZATION |
> > > > STATE_DISTRIBUTING |
> > > > > +				STATE_COLLECTING);
> > > > > +
> > > > > +			/* Use default aggregator */
> > > > > +			port->used_agregator_idx = i;
> > > > > +		}
> > > > > +	}
> > > > > +
> > > > > +	port = &data->port_list[slave_pos];
> > > > > +	timer_cancel(&port->current_while_timer);
> > > > > +	timer_cancel(&port->periodic_timer);
> > > > > +	timer_cancel(&port->wait_while_timer);
> > > > > +	timer_cancel(&port->tx_machine_timer);
> > > > > +
> > > > These all seem rather racy.  Alarm callbacks are executed with the alarm list
> > > > locks not held.  So there is every possibility that you could execute these (or
> > > > any timer_cancel calls in this PMD in parallel with the internal state machine
> > > > timer callback, and leave either with a corrupted timer list (resulting from a
> > > > double free between here, and the actual callback site),
> > >
> > > I don't think so. Yes, callbacks are executed with  alarm list locks not held, but
> > > this is not the issue because access to list itself is guarded by lock and
> > > ap->executing variable. So list will not be trashed. Check source of
> > > eal_alarm_callback(), rte_eal_alarm_set() and rte_eal_alarm_cancel().
> > >
> > Yes, you're right, the list is probably safe wht the executing bit.
> > 
> > > > or a timer that is
> > > > actually still pending when a slave is removed.
> > > >
> > > This is not the issue also, but problem might be similar. I assumed that alarms
> > > are atomic but when I looked at rte alarms closer I saw a race condition
> > > between and rte_eal_alarm_cancel() from  bond_mode_8023ad_stop()
> > > and rte_eal_alarm_set() from state machines callback. This need to be
> > > reworked in some way.
> > 
> > Yes, this is what I was referring to:
> > 
> > CPU0				CPU1
> > rte_eal_alarm_callback		bond_8023ad_deactivate_slave
> > -bond_8023_ad_periodic_cb	timer_cancel
> > timer_set
> > 
> > If those timer functions operate on the same timer, the result is that you can
> > leave the stop/deactivate slave paths with a timer function for that slave still
> > pending. The bonding mode needs some internal state to serialize those
> > operations and determine if the timer should be reactivated.
> > 
> > Neil
> 
> I did rethink the issue and problem is much simpler than it looks like. I did the 
> following:
> 1. Change internal state machine alarms to use rte_rdtsc(). This makes all 
>  mode 4 internal timer_*() function not affected by any race condition.
> 2. Do a busy loop when canceling main callback timer until cancel is successfull.
> This should do the trick about race condition. Do you agree?
> 
I think that will work, but I believe you're making it more complicated (and
less reusable) than it needs to be.  What I think you really need to do is
create a new rte api call, rte_eal_alarm_cancel_sync (something like the
equivalent of del_timer_sync in linux, that wraps up the
while(rte_eal_alarm_cancel(...) == 0) {rte_pause} in its own function (so other
call sites can use it, as I don't think this is an uncommon problem), Then just
create a bonding-internal state flag to signal the periodic callback that it
shouldn't re-arm the timer.  That way all you have to do is set the flag, and
call rte_eal_alarm_cancel_sync, and you're done.  And other applications will be
able to handle this common type of operation as well

Neil
  
Wodkowski, PawelX Sept. 22, 2014, 6:26 a.m. UTC | #6
> I think that will work, but I believe you're making it more complicated (and
> less reusable) than it needs to be.  What I think you really need to do is
> create a new rte api call, rte_eal_alarm_cancel_sync (something like the
> equivalent of del_timer_sync in linux, that wraps up the
> while(rte_eal_alarm_cancel(...) == 0) {rte_pause} in its own function (so other
> call sites can use it, as I don't think this is an uncommon problem), Then just
> create a bonding-internal state flag to signal the periodic callback that it
> shouldn't re-arm the timer.  That way all you have to do is set the flag, and
> call rte_eal_alarm_cancel_sync, and you're done.  And other applications will be
> able to handle this common type of operation as well
> 
> Neil

I agree with you that alarms should be upgraded but this need to discussed and 
tested. Now there is not time for that.

Thank you
Pawel
  
Neil Horman Sept. 22, 2014, 10:24 a.m. UTC | #7
On Mon, Sep 22, 2014 at 06:26:21AM +0000, Wodkowski, PawelX wrote:
> > I think that will work, but I believe you're making it more complicated (and
> > less reusable) than it needs to be.  What I think you really need to do is
> > create a new rte api call, rte_eal_alarm_cancel_sync (something like the
> > equivalent of del_timer_sync in linux, that wraps up the
> > while(rte_eal_alarm_cancel(...) == 0) {rte_pause} in its own function (so other
> > call sites can use it, as I don't think this is an uncommon problem), Then just
> > create a bonding-internal state flag to signal the periodic callback that it
> > shouldn't re-arm the timer.  That way all you have to do is set the flag, and
> > call rte_eal_alarm_cancel_sync, and you're done.  And other applications will be
> > able to handle this common type of operation as well
> > 
> > Neil
> 
> I agree with you that alarms should be upgraded but this need to discussed and 
> tested. Now there is not time for that.
> 
Nak, thats a completely broken argument.  I've just demonstrated to you a race
condition in the driver you are submitting.  Granted it stems from a design
lmitation in the alarm subsystem, but its what we all have to work with, and we
can work around it and make the driver safe.  To say there is "no time" to fix
it, implies to me that you care more about just slamming your code in than
making anything work properly.  What exactly is your rush that makes you think
its more important for the code to be merged than fixing it to work correctly?
Neil
  
Neil Horman Sept. 22, 2014, 1:15 p.m. UTC | #8
On Mon, Sep 22, 2014 at 11:59:33AM +0000, Jastrzebski, MichalX K wrote:
> Hi Neil, 
> I agree with you that the most important is to release a code that works best without errors and this is what we all are working on. Pawel's answer "no time" doesn't sounds good here (he meant something else) - I ensure you that Pawel cares a lot to release a very good code. He proposes a solution that fixes this race for 1.8 release. Implementation of a new rte_api_call will take more time, because this is a new functionality for eal_timer library and with this it seems to be not much time left. Having said that, should we abandon hotfix and focus on the long-term solution?
> 

Yes, I think the proper solution should be the one we shoot for here, though in
re-reading my response, perhaps I wasn't as clear as I could have been.  All I'm
really advocating here is that the while(...) { rte_pause() } loop that Pawels
fix puts in place is better wrapped in a function implemented in the rte_alarm
library, rather than privately to the bonding library, along with the
replacement of all the pointer assignments with an internal state variable.  I'm
not asserting that we need to audit the code to fix up all other call sites
using the rte_alarm api right now (though a quick cscope search reveals the only
locations are in the test apps).  I'm just saying lets fix it in such a way that
other users can take advantage of it now, and write the unit tests for it after
it ships.

In fact, looking at the alarm test infrastructure, alarm re-arming stress isn't
currently tested at all, so that could be a large undertaking after shipment.
Neil

> Best regards
> Michal
> 
> 
> -----Original Message-----
> From: Neil Horman [mailto:nhorman@tuxdriver.com] 
> Sent: Monday, September 22, 2014 12:25 PM
> To: Wodkowski, PawelX
> Cc: dev@dpdk.org; Jastrzebski, MichalX K; Doherty, Declan
> Subject: Re: [dpdk-dev] [PATCH 2/2] bond: add mode 4 support
> 
> On Mon, Sep 22, 2014 at 06:26:21AM +0000, Wodkowski, PawelX wrote:
> > > I think that will work, but I believe you're making it more 
> > > complicated (and less reusable) than it needs to be.  What I think 
> > > you really need to do is create a new rte api call, 
> > > rte_eal_alarm_cancel_sync (something like the equivalent of 
> > > del_timer_sync in linux, that wraps up the
> > > while(rte_eal_alarm_cancel(...) == 0) {rte_pause} in its own 
> > > function (so other call sites can use it, as I don't think this is 
> > > an uncommon problem), Then just create a bonding-internal state flag 
> > > to signal the periodic callback that it shouldn't re-arm the timer.  
> > > That way all you have to do is set the flag, and call 
> > > rte_eal_alarm_cancel_sync, and you're done.  And other applications 
> > > will be able to handle this common type of operation as well
> > > 
> > > Neil
> > 
> > I agree with you that alarms should be upgraded but this need to 
> > discussed and tested. Now there is not time for that.
> > 
> Nak, thats a completely broken argument.  I've just demonstrated to you a race condition in the driver you are submitting.  Granted it stems from a design lmitation in the alarm subsystem, but its what we all have to work with, and we can work around it and make the driver safe.  To say there is "no time" to fix it, implies to me that you care more about just slamming your code in than making anything work properly.  What exactly is your rush that makes you think its more important for the code to be merged than fixing it to work correctly?
> Neil
> 
> --------------------------------------------------------------
> Intel Shannon Limited
> Registered in Ireland
> Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
> Registered Number: 308263
> Business address: Dromore House, East Park, Shannon, Co. Clare
> 
> This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). Any review or distribution by others is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies.
> 
> 
>
  

Patch

diff --git a/lib/librte_ether/rte_ether.h b/lib/librte_ether/rte_ether.h
index 2e08f23..1a3711b 100644
--- a/lib/librte_ether/rte_ether.h
+++ b/lib/librte_ether/rte_ether.h
@@ -293,6 +293,7 @@  struct vlan_hdr {
 #define ETHER_TYPE_RARP 0x8035 /**< Reverse Arp Protocol. */
 #define ETHER_TYPE_VLAN 0x8100 /**< IEEE 802.1Q VLAN tagging. */
 #define ETHER_TYPE_1588 0x88F7 /**< IEEE 802.1AS 1588 Precise Time Protocol. */
+#define ETHER_TYPE_SLOW 0x8809 /**< Slow protocols (LACP and Marker). */
 
 #ifdef __cplusplus
 }
diff --git a/lib/librte_pmd_bond/Makefile b/lib/librte_pmd_bond/Makefile
index 953d75e..c2312c2 100644
--- a/lib/librte_pmd_bond/Makefile
+++ b/lib/librte_pmd_bond/Makefile
@@ -44,6 +44,7 @@  CFLAGS += $(WERROR_FLAGS)
 #
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_api.c
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_pmd.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_8023ad.c
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_args.c
 
 #
diff --git a/lib/librte_pmd_bond/rte_eth_bond.h b/lib/librte_pmd_bond/rte_eth_bond.h
index bd59780..6aac4ec 100644
--- a/lib/librte_pmd_bond/rte_eth_bond.h
+++ b/lib/librte_pmd_bond/rte_eth_bond.h
@@ -75,6 +75,10 @@  extern "C" {
 /**< Broadcast (Mode 3).
  * In this mode all transmitted packets will be transmitted on all available
  * active slaves of the bonded. */
+#define BONDING_MODE_8023AD				(4)
+/**< 802.3AD (Mode 4).
+ * In this mode transmission and reception of packets is managed by LACP
+ * protocol specified in 802.3AD documentation. */
 
 /* Balance Mode Transmit Policies */
 #define BALANCE_XMIT_POLICY_LAYER2		(0)
diff --git a/lib/librte_pmd_bond/rte_eth_bond_8023ad.c b/lib/librte_pmd_bond/rte_eth_bond_8023ad.c
new file mode 100644
index 0000000..6ce6efb
--- /dev/null
+++ b/lib/librte_pmd_bond/rte_eth_bond_8023ad.c
@@ -0,0 +1,1064 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stddef.h>
+#include <string.h>
+
+#include <rte_alarm.h>
+#include <rte_malloc.h>
+#include <rte_errno.h>
+
+#include "rte_eth_bond_private.h"
+#include "rte_eth_bond_8023ad.h"
+
+#include <rte_cycles.h>
+
+#define RTE_LIBRTE_BOND_DEBUG_8023AX
+
+#ifdef RTE_LIBRTE_BOND_DEBUG_8023AX
+#define BOND_ASSERT(expr) \
+	((expr) ? (void) (0) \
+	: rte_panic("%s(%d): assertion failed" __STRING(expr), __FILE__, __LINE__))
+#else
+#define BOND_ASSERT(expr) do { } while (0)
+#endif
+
+#ifdef RTE_LIBRTE_BOND_DEBUG_8023AX
+#define _PORT_ID internals->active_slaves[port_num]
+#define BOND_DEBUG(fmt, ...) RTE_LOG(DEBUG, PMD, "%6u [Port %u: %s] " fmt, \
+			bond_dbg_get_time_diff(), _PORT_ID, __FUNCTION__, ##__VA_ARGS__)
+
+static unsigned
+bond_dbg_get_time_diff(void)
+{
+	static unsigned ms_start = 0;
+	struct timespec t;
+	uint64_t ms;
+
+	clock_gettime(CLOCK_MONOTONIC, &t);
+	ms = (((long)t.tv_sec * 1000000000L) + t.tv_nsec) / 1000000L;
+
+	if (ms_start == 0)
+		ms_start = ms;
+
+	return ms - ms_start;
+}
+
+static void
+bond_print_lacp(struct lacpdu *l)
+{
+	char a_state[256] = { 0 };
+	char p_state[256] = { 0 };
+	static const char *state_labels[] = {
+		"ACT", "TIMEOUT", "AGG", "SYNC", "COL", "DIST", "DEF", "EXP"
+	};
+
+	int a_len = 0;
+	int p_len = 0;
+	uint8_t i;
+
+	for (i = 0; i < 8; i++) {
+		if ((l->actor.state >> i) & 1) {
+			a_len += snprintf(a_state + a_len, sizeof(a_state) - a_len, "%s ",
+				state_labels[i]);
+		}
+
+		if ((l->partner.state >> i) & 1) {
+			p_len += snprintf(p_state + p_len, sizeof(p_state) - p_len, "%s ",
+				state_labels[i]);
+		}
+	}
+
+	if (a_len && a_state[a_len-1] == ' ')
+		a_state[a_len-1] = '\0';
+
+	if (p_len && p_state[p_len-1] == ' ')
+			p_state[p_len-1] = '\0';
+
+	RTE_LOG(DEBUG, PMD, "LACP: {\n"\
+			"  subtype= %02X\n"\
+			"  ver_num=%02X\n"\
+			"  actor={ tlv=%02X, len=%02X\n"\
+			"    pri=%04X, system=(ADDRESS), key=%04X, p_pri=%04X p_num=%04X\n"\
+			"       state={ %s }\n"\
+			"  }\n"\
+			"  partner={ tlv=%02X, len=%02X\n"\
+			"    pri=%04X, system=(ADDRESS), key=%04X, p_pri=%04X p_num=%04X\n"\
+			"       state={ %s }\n"\
+			"  }\n"\
+			"  collector={info=%02X, length=%02X, max_delay=%04X\n, " \
+							"type_term=%02X, terminator_length = %02X}\n",\
+			l->subtype,\
+			l->version_number,\
+			l->actor.tlv_type_info,\
+			l->actor.info_length,\
+			l->actor.port_params.system_priority,\
+			l->actor.port_params.key,\
+			l->actor.port_params.port_priority,\
+			l->actor.port_params.port_number,\
+			a_state,\
+			l->partner.tlv_type_info,\
+			l->partner.info_length,\
+			l->partner.port_params.system_priority,\
+			l->partner.port_params.key,\
+			l->partner.port_params.port_priority,\
+			l->partner.port_params.port_number,\
+			p_state,\
+			l->tlv_type_collector_info,\
+			l->collector_info_length,\
+			l->collector_max_delay,\
+			l->tlv_type_terminator,\
+			l->terminator_length);
+
+}
+#define BOND_PRINT_LACP(lacpdu) bond_print_lacp(lacpdu)
+
+#else
+#define BOND_PRINT_LACP(lacpdu) do { } while (0)
+#define BOND_DEBUG(fmt, ...) do { } while (0)
+#endif
+
+static const struct ether_addr lacp_mac_addr = {
+	.addr_bytes = {0x01, 0x80, 0xC2, 0x00, 0x00, 0x02}
+};
+
+static void
+timer_expired_cb(void *arg)
+{
+	enum timer_state *timer = arg;
+
+	BOND_ASSERT(*timer == TIMER_RUNNING);
+	*timer = TIMER_EXPIRED;
+}
+
+static void
+timer_cancel(enum timer_state *timer)
+{
+	rte_eal_alarm_cancel(&timer_expired_cb, timer);
+	*timer = TIMER_NOT_STARTED;
+}
+
+static void
+timer_set(enum timer_state *timer, uint64_t timeout)
+{
+	rte_eal_alarm_cancel(&timer_expired_cb, timer);
+	rte_eal_alarm_set(timeout * 1000, &timer_expired_cb, timer);
+	*timer = TIMER_RUNNING;
+}
+
+static bool
+timer_is_expired(enum timer_state *timer)
+{
+	return *timer == TIMER_EXPIRED;
+}
+
+static void
+record_default(struct port *port)
+{
+	/* Record default parametes for partner. Partner admin parameters
+	 * are not implemented so nothing to copy. Only mark actor that parner is
+	 * in defaulted state. */
+	port->partner_state = STATE_LACP_ACTIVE;
+	ACTOR_STATE_SET(port, DEFAULTED);
+}
+
+/** Function handles rx state machine.
+ *
+ * This function implements Receive State Machine from point 5.4.12 in
+ * 802.1AX documentation. It should be called periodically.
+ *
+ * @param lacpdu		LACPDU received.
+ * @param port			Port on which LACPDU was received.
+ */
+static void
+rx_machine(struct bond_dev_private *internals, uint8_t port_num,
+	struct lacpdu *lacp)
+{
+	struct port *port = &internals->mode4.port_list[port_num];
+
+	if (SM_FLAG(port, BEGIN)) {
+		/* Initialize stuff */
+		BOND_DEBUG("-> INITIALIZE\n");
+		SM_FLAG_CLR(port, MOVED);
+		port->selected = UNSELECTED;
+
+		record_default(port);
+
+		ACTOR_STATE_CLR(port, EXPIRED);
+		timer_cancel(&port->current_while_timer);
+
+		/* DISABLED: On initialization partner is out of sync */
+		PARTNER_STATE_CLR(port, SYNCHRONIZATION);
+
+		/* LACP DISABLED stuff if LACP not enabled on this port */
+		if (!SM_FLAG(port, LACP_ENABLED))
+			PARTNER_STATE_CLR(port, AGGREGATION);
+	}
+
+	if (!SM_FLAG(port, LACP_ENABLED)) {
+		/* Update parameters only if state changed */
+		if (port->current_while_timer) {
+			port->selected = UNSELECTED;
+			record_default(port);
+			PARTNER_STATE_CLR(port, AGGREGATION);
+			ACTOR_STATE_CLR(port, EXPIRED);
+			timer_cancel(&port->current_while_timer);
+		}
+		return;
+	}
+
+	if (lacp) {
+		BOND_DEBUG("LACP -> CURRENT\n");
+		BOND_PRINT_LACP(lacp);
+		/* Update selected flag. If partner parameters are defaulted assume they
+		 * are match. If not defaulted  compare LACP actor with ports parner
+		 * params. */
+		if (!(port->actor_state & STATE_DEFAULTED) &&
+			(((port->partner_state ^ lacp->actor.state) & STATE_AGGREGATION) ||
+				memcmp(&port->partner, &lacp->actor.port_params,
+					sizeof(port->partner)) != 0)) {
+			BOND_DEBUG("selected <- UNSELECTED\n");
+			port->selected = UNSELECTED;
+		}
+
+		/* Record this PDU actor params as partner params */
+		memcpy(&port->partner, &lacp->actor.port_params,
+			sizeof(struct port_params));
+		port->partner_state = lacp->actor.state;
+		ACTOR_STATE_CLR(port, DEFAULTED);
+
+		/* Update NTT if partners information are outdated */
+		uint8_t state_mask = STATE_LACP_ACTIVE | STATE_LACP_SHORT_TIMEOUT |
+			STATE_SYNCHRONIZATION | STATE_AGGREGATION;
+
+		if (((port->actor_state ^ lacp->partner.state) & state_mask) ||
+				memcmp(&port->actor, &lacp->partner.port_params,
+					sizeof(struct port_params)) != 0) {
+			port->sm_flags |= SM_FLAGS_NTT;
+		}
+
+		/* If LACP partner params match this port actor params */
+		if (memcmp(&port->actor, &lacp->partner.port_params,
+			    sizeof(port->actor)) == 0 &&
+			(port->partner_state & STATE_AGGREGATION) == (port->actor_state
+			    & STATE_AGGREGATION))
+			PARTNER_STATE_SET(port, SYNCHRONIZATION);
+		else if (!(port->partner_state & STATE_AGGREGATION) &&
+			    (port->actor_state & STATE_AGGREGATION))
+			PARTNER_STATE_SET(port, SYNCHRONIZATION);
+		else
+			PARTNER_STATE_CLR(port, SYNCHRONIZATION);
+
+		if (port->actor_state & STATE_LACP_SHORT_TIMEOUT)
+			timer_set(&port->current_while_timer, BOND_8023AD_SHORT_TIMEOUT_MS);
+		else
+			timer_set(&port->current_while_timer, BOND_8023AD_LONG_TIMEOUT_MS);
+
+		ACTOR_STATE_CLR(port, EXPIRED);
+		return; /* No state change */
+	}
+
+	if (port->current_while_timer != TIMER_RUNNING) {
+		ACTOR_STATE_SET(port, EXPIRED);
+		PARTNER_STATE_CLR(port, SYNCHRONIZATION);
+		PARTNER_STATE_SET(port, LACP_SHORT_TIMEOUT);
+		timer_set(&port->current_while_timer, BOND_8023AD_SHORT_TIMEOUT_MS);
+	}
+}
+
+/**
+ * Function handles periodic tx state machine.
+ *
+ * Function implements Periodic Transmission state machine from point 5.4.13
+ * in 802.1AX documentation. It should be called periodically.
+ *
+ * @param port			Port to handle state machine.
+ */
+static void
+periodic_machine(struct bond_dev_private *internals, uint8_t port_num)
+{
+	struct port *port = &internals->mode4.port_list[port_num];
+	/* Calculate if either site is LACP enabled */
+	uint32_t timeout;
+	uint16_t sm_flags = port->sm_flags;
+	uint8_t active = ACTOR_STATE(port, LACP_ACTIVE) ||
+		PARTNER_STATE(port, LACP_ACTIVE);
+
+	uint8_t is_partner_fast, was_partner_fast;
+	/* No periodic is on BEGIN, LACP DISABLE or when both sides are pasive */
+	if ((sm_flags & SM_FLAGS_BEGIN) || !(sm_flags & SM_FLAGS_LACP_ENABLED) ||
+		    active == 0) {
+		timer_cancel(&port->periodic_timer);
+		port->tx_machine_timer = TIMER_EXPIRED;
+		sm_flags &= ~SM_FLAGS_PARTNER_SHORT_TIMEOUT;
+		port->sm_flags = sm_flags;
+
+		BOND_DEBUG("-> NO_PERIODIC ( %s%s%s)\n",
+			SM_FLAG(port, BEGIN) ? "begind " : "",
+			SM_FLAG(port, LACP_ENABLED) ? "" : "LACP disabled ",
+			active ? "LACP active " : "LACP pasive ");
+		return;
+	}
+
+	is_partner_fast = !!(port->partner_state & STATE_LACP_SHORT_TIMEOUT);
+	was_partner_fast = !!(port->sm_flags & SM_FLAGS_PARTNER_SHORT_TIMEOUT);
+
+	/* If periodic timer is not started, transit from NO PERIODIC to FAST/SLOW.
+	 * Other case: check if timer expire or partners settings changed. */
+	if (port->periodic_timer != TIMER_NOT_STARTED) {
+		if (timer_is_expired(&port->periodic_timer))
+			sm_flags |= SM_FLAGS_NTT;
+		else if (is_partner_fast != was_partner_fast) {
+			/* Partners timeout  was slow and now it is fast -> send LACP.
+			 * In other case (was fast and now it is slow) just switch
+			 * timeout to slow without forcing send of LACP (because standard
+			 * say so)*/
+			if (!is_partner_fast)
+				sm_flags |= SM_FLAGS_NTT;
+		} else
+			return; /* Nothing changed */
+	}
+
+	/* Handle state transition to FAST/SLOW LACP timeout */
+	if (is_partner_fast) {
+		timeout = BOND_8023AD_FAST_PERIODIC_MS;
+		sm_flags |= SM_FLAGS_PARTNER_SHORT_TIMEOUT;
+	} else {
+		timeout = BOND_8023AD_SLOW_PERIODIC_MS;
+		sm_flags &= ~SM_FLAGS_PARTNER_SHORT_TIMEOUT;
+	}
+
+	timer_set(&port->periodic_timer, timeout);
+	port->sm_flags = sm_flags;
+}
+
+/**
+ * Function handles mux state machine.
+ *
+ * Function implements Mux Machine from point 5.4.15 in 802.1AX documentation.
+ * It should be called periodically.
+ *
+ * @param port			Port to handle state machine.
+ */
+static int
+mux_machine(struct bond_dev_private *internals, uint8_t port_num)
+{
+	bool ntt = false;
+	struct port *port = &internals->mode4.port_list[port_num];
+
+	/* Save current state for later use */
+	const uint8_t state_mask = STATE_SYNCHRONIZATION | STATE_DISTRIBUTING |
+		STATE_COLLECTING;
+
+	/* Enter DETACHED state on BEGIN condition or from any other state if
+	 * port was unselected */
+	if (SM_FLAG(port, BEGIN) ||
+			port->selected == UNSELECTED || (port->selected == STANDBY &&
+				(port->actor_state & state_mask) != 0)) {
+		/* detach mux from aggregator not used */
+		port->actor_state &= ~state_mask;
+		/* Set ntt to true if BEGIN condition or transition from any other state
+		 * which is indicated that wait_while_timer was started */
+		if (SM_FLAG(port, BEGIN) ||
+				port->wait_while_timer != TIMER_NOT_STARTED) {
+			SM_FLAG_SET(port, NTT);
+			BOND_DEBUG("-> DETACHED\n");
+		}
+		timer_cancel(&port->wait_while_timer);
+	}
+
+	if (port->wait_while_timer == TIMER_NOT_STARTED) {
+		if (port->selected == SELECTED || port->selected == STANDBY) {
+			timer_set(&port->wait_while_timer,
+				BOND_8023AD_AGGREGATE_WAIT_TIMEOUT_MS);
+
+			BOND_DEBUG("DETACHED -> WAITING\n");
+		}
+		/* Waiting state entered */
+		return 0;
+	}
+
+	/* Transit next state if port is ready */
+	if (!timer_is_expired(&port->wait_while_timer))
+		return 0;
+
+	if ((ACTOR_STATE(port, DISTRIBUTING) || ACTOR_STATE(port, COLLECTING)) &&
+		!PARTNER_STATE(port, SYNCHRONIZATION)) {
+		/* If in COLLECTING or DISTRIBUTING state and partner becomes out of
+		 * sync transit to ATACHED state.  */
+		ACTOR_STATE_CLR(port, DISTRIBUTING);
+		ACTOR_STATE_CLR(port, COLLECTING);
+		/* Clear actor sync to activate transit ATACHED in condition bellow */
+		ACTOR_STATE_CLR(port, SYNCHRONIZATION);
+		BOND_DEBUG("Out of sync -> ATTACHED\n");
+	} else if (!ACTOR_STATE(port, SYNCHRONIZATION)) {
+		/* attach mux to aggregator */
+		BOND_ASSERT((port->actor_state & (STATE_COLLECTING |
+			STATE_DISTRIBUTING)) == 0);
+		ACTOR_STATE_SET(port, SYNCHRONIZATION);
+		ntt = true;
+		BOND_DEBUG("ATTACHED Entered\n");
+	} else if (!ACTOR_STATE(port, COLLECTING)) {
+		/* Start collecting if in sync */
+		if (PARTNER_STATE(port, SYNCHRONIZATION)) {
+			BOND_DEBUG("ATTACHED -> COLLECTING\n");
+			ACTOR_STATE_SET(port, COLLECTING);
+		}
+	} else if (ACTOR_STATE(port, COLLECTING)) {
+		/* Check if partner is in COLLECTING state. If so this port can
+		 * distribute frames to it */
+		if (!ACTOR_STATE(port, DISTRIBUTING)) {
+			if (PARTNER_STATE(port, COLLECTING)) {
+				/* Enable  DISTRIBUTING if partner is collecting */
+				ACTOR_STATE_SET(port, DISTRIBUTING);
+				ntt = true;
+				BOND_DEBUG("COLLECTING -> DISTRIBUTING\n");
+			}
+		} else {
+			if (!PARTNER_STATE(port, COLLECTING)) {
+				/* Disable DISTRIBUTING (enter COLLECTING state) if partner
+				 * is not collecting */
+				ACTOR_STATE_CLR(port, DISTRIBUTING);
+				ntt = true;
+				BOND_DEBUG("DISTRIBUTING -> COLLECTING\n");
+			}
+		}
+	}
+
+	if (ntt != false)
+		SM_FLAG_SET(port, NTT);
+
+	return ntt;
+}
+
+
+/**
+ * Function handles transmit state machine.
+ *
+ * Function implements Transmit Machine from point 5.4.16 in 802.1AX
+ * documentation.
+ *
+ * @param port
+ */
+static void
+tx_machine(struct rte_eth_dev *bond_dev, uint8_t port_num)
+{
+	struct bond_dev_private *internals = bond_dev->data->dev_private;
+	struct port *port = &internals->mode4.port_list[port_num];
+	struct mode8023ad_data *data = &internals->mode4;
+
+	struct slow_protocol_msg *msg;
+	struct lacpdu_header *hdr;
+	struct lacpdu *lacpdu;
+
+	/* If periodic timer is not running periodic machine is in NO PERIODIC and
+	 * acording to 802.3ax standard tx machine should not transmit any frames
+	 * and set ntt to false. */
+	if (port->periodic_timer == TIMER_NOT_STARTED)
+		SM_FLAG_CLR(port, NTT);
+
+	if (!SM_FLAG(port, NTT) || !timer_is_expired(&port->tx_machine_timer))
+		return;
+
+	/* If all conditions are met construct packet to send */
+	if (rte_ring_dequeue(data->free_ring, (void **)&msg) == -ENOBUFS) {
+		BOND_DEBUG("tx_machine: no free_lacpdu_ring\n");
+		return;
+	}
+
+	msg->pkt = rte_pktmbuf_alloc(data->mbuf_pool);
+	if (msg->pkt == NULL) {
+		/* FIXME: temporal workaround, when packets are no freed by driver */
+		struct bond_rx_queue *bd_rx_q;
+		uint8_t i;
+
+		for (i = 0; i < bond_dev->data->nb_rx_queues; i++) {
+			bd_rx_q = (struct bond_rx_queue *)bond_dev->data->rx_queues[i];
+			BOND_ASSERT(bd_rx_q != NULL && bd_rx_q->mb_pool != NULL);
+			/* Do not use SP or SC pools as this is unsafe. */
+			if (bd_rx_q->mb_pool->flags & (MEMPOOL_F_SC_GET | MEMPOOL_F_SP_PUT))
+				continue;
+
+			msg->pkt = rte_pktmbuf_alloc(bd_rx_q->mb_pool);
+			if (msg->pkt) {
+				RTE_LOG(WARNING, PMD, "Failed to allocate LACP from mode4 pool."
+				"Packet was allocated from pool of rx queue %u\n", i);
+				break;
+			}
+		}
+
+		if (msg->pkt == NULL) {
+			rte_ring_enqueue(data->free_ring, msg);
+			RTE_LOG(ERR, PMD, "Failed to allocate LACP packet from pool\n");
+			return;
+		}
+	}
+	msg->port_id = internals->active_slaves[port_num];
+	hdr = rte_pktmbuf_mtod(msg->pkt, struct lacpdu_header *);
+
+	msg->pkt->pkt.data_len = sizeof(*hdr);
+	msg->pkt->pkt.pkt_len = sizeof(*hdr);
+	/* Source and destination MAC */
+	ether_addr_copy(&lacp_mac_addr, &hdr->eth_hdr.d_addr);
+	mac_address_get(bond_dev, &hdr->eth_hdr.s_addr);
+	hdr->eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_SLOW);
+
+	port = &data->port_list[port_num];
+	lacpdu = &hdr->lacpdu;
+	memset(lacpdu, 0, sizeof(*lacpdu));
+
+	/* Initialize LACP part */
+	lacpdu->subtype = SUBTYPE_LACP;
+	lacpdu->version_number = 1;
+
+	/* ACTOR */
+	lacpdu->actor.tlv_type_info = TLV_TYPE_ACTOR_INFORMATION;
+	lacpdu->actor.info_length = sizeof(struct lacpdu_actor_partner_params);
+	memcpy(&hdr->lacpdu.actor.port_params, &port->actor,
+			sizeof(port->actor));
+	lacpdu->actor.state = port->actor_state;
+
+	/* PARTNER */
+	lacpdu->partner.tlv_type_info = TLV_TYPE_PARTNER_INFORMATION;
+	lacpdu->partner.info_length = sizeof(struct lacpdu_actor_partner_params);
+	memcpy(&lacpdu->partner.port_params, &port->partner,
+			sizeof(struct port_params));
+	lacpdu->partner.state = port->partner_state;
+
+	/* Other fields */
+	lacpdu->tlv_type_collector_info = TLV_TYPE_COLLECTOR_INFORMATION;
+	lacpdu->collector_info_length = 0x10;
+	lacpdu->collector_max_delay = 0;
+
+	lacpdu->tlv_type_terminator = TLV_TYPE_TERMINATOR_INFORMATION;
+	lacpdu->terminator_length = 0;
+
+	if (rte_ring_enqueue(data->tx_ring, msg) == -ENOBUFS) {
+		/* If TX ring full, drop packet and free message. Retransmission
+		 * will happen in next function call. */
+		rte_pktmbuf_free(msg->pkt);
+		rte_ring_enqueue(data->free_ring, msg);
+
+		RTE_LOG(ERR, PMD, "Failed to enqueue LACP packet into tx ring");
+		return;
+	}
+
+	BOND_DEBUG("sending LACP frame\n");
+	BOND_PRINT_LACP(lacpdu);
+
+	SM_FLAG_CLR(port, NTT);
+	/* Add 10% random backoff time to better distrube slow packets
+	 * between tx bursts. */
+	timer_set(&port->tx_machine_timer, BOND_8023AD_TX_PERIOD_MS +
+		rand() % ((BOND_8023AD_TX_PERIOD_MS * 10) / 100));
+}
+
+/**
+ * Function assigns port to aggregator.
+ *
+ * @param bond_dev_private	Pointer to bond_dev_private structure.
+ * @param port_pos			Port to assign.
+ */
+static void
+selection_logic(struct bond_dev_private *internals, uint8_t port_num)
+{
+	struct mode8023ad_data *data = &internals->mode4;
+	struct port *agg, *port, *port_list;
+	uint8_t ports_count;
+	uint8_t i;
+
+	ports_count = internals->slave_count;
+	port_list = data->port_list;
+	port = &port_list[port_num];
+
+	/* Skip port if it is selected */
+	if (port->selected == SELECTED)
+		return;
+
+	/* Search for aggregator suitable for this port */
+	for (i = 0; i < ports_count; ++i) {
+		agg = &port_list[i];
+		/* Skip ports that are not aggreagators */
+		if (agg->used_agregator_idx != i && i == port_num)
+			continue;
+
+		/* Actors system ID is not checked since all slave device have the same
+		 * ID (MAC address). */
+		if ((agg->actor.key == port->actor.key &&
+			agg->partner.system_priority == port->partner.system_priority &&
+			is_same_ether_addr(&agg->partner.system, &port->partner.system) == 1
+			&& (agg->partner.key == port->partner.key)) &&
+			is_zero_ether_addr(&port->partner.system) != 1 &&
+			(agg->actor.key &
+				rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) != 0) {
+
+			port->used_agregator_idx = i;
+			break;
+		}
+	}
+
+	/* By default, port uses it self as agregator */
+	if (i == ports_count)
+		port->used_agregator_idx = port_num;
+
+	port->selected = SELECTED;
+
+	BOND_DEBUG("-> SELECTED: ID=%3u pos=%3u\n"
+		"\t%s ID=%3u pos=%3u\n",
+		internals->active_slaves[port_num], port_num,
+		port->used_agregator_idx == port_num ?
+			"agregator not found, using default" : "agregator found",
+		port->used_agregator_idx,
+		internals->active_slaves[port->used_agregator_idx]);
+}
+
+/**
+ * Helper function which updates current port
+ */
+static void
+update_mux_slaves(struct bond_dev_private *internals)
+{
+	struct mode8023ad_data *data = &internals->mode4;
+	struct port *port;
+	uint8_t current[RTE_MAX_ETHPORTS];
+	uint8_t count = 0;
+	uint8_t i;
+
+	for (i = 0; i < internals->slave_count; i++) {
+		port = &data->port_list[i];
+		if (ACTOR_STATE(port, DISTRIBUTING))
+			current[count++] = i;
+	}
+
+	memcpy(data->distibuting_slaves, current, sizeof(current[0]) * count);
+	data->distibuting_slaves_count = count;
+}
+
+/* Function maps DPDK speed to bonding speed stored in key field */
+static uint16_t
+link_speed_key(uint16_t speed) {
+	uint16_t key_speed;
+
+	switch (speed) {
+	case ETH_LINK_SPEED_AUTONEG:
+		key_speed = 0x00;
+		break;
+	case ETH_LINK_SPEED_10:
+		key_speed = BOND_LINK_SPEED_KEY_10M;
+		break;
+	case ETH_LINK_SPEED_100:
+		key_speed = BOND_LINK_SPEED_KEY_100M;
+		break;
+	case ETH_LINK_SPEED_1000:
+		key_speed = BOND_LINK_SPEED_KEY_1000M;
+		break;
+	case ETH_LINK_SPEED_10G:
+		key_speed = BOND_LINK_SPEED_KEY_10G;
+		break;
+	case ETH_LINK_SPEED_20G:
+		key_speed = BOND_LINK_SPEED_KEY_20G;
+		break;
+	case ETH_LINK_SPEED_40G:
+		key_speed = BOND_LINK_SPEED_KEY_40G;
+		break;
+	default:
+		/* Unknown speed*/
+		key_speed = 0xFFFF;
+	}
+
+	return key_speed;
+}
+
+static void
+bond_mode_8023ad_periodic_cb(void *arg)
+{
+	struct rte_eth_dev *bond_dev = arg;
+	struct bond_dev_private *internals = bond_dev->data->dev_private;
+	struct mode8023ad_data *data = &internals->mode4;
+
+	struct slow_protocol_frame *slow_hdr;
+	struct rte_eth_link link_info;
+
+	struct slow_protocol_msg *msgs[BOND_MODE_8023AX_RX_RING_SIZE];
+	uint16_t port_num, j, nb_msgs;
+	/* if not 0 collecting/distibuting array need update */
+	uint16_t slaves_changed = 0;
+	bool machines_invoked;
+
+	/* Update link status on each port */
+	for (port_num = 0; port_num < internals->active_slave_count; port_num++) {
+		uint8_t key;
+
+		rte_eth_link_get(internals->active_slaves[port_num], &link_info);
+		if (link_info.link_status != 0) {
+			key = link_speed_key(link_info.link_speed) << 1;
+			if (link_info.link_duplex == ETH_LINK_FULL_DUPLEX)
+				key |= BOND_LINK_FULL_DUPLEX_KEY;
+		} else
+			key = 0;
+
+		data->port_list[port_num].actor.key = rte_cpu_to_be_16(key);
+	}
+
+	nb_msgs = (uint16_t)rte_ring_dequeue_burst(data->rx_ring, (void **) msgs,
+		BOND_MODE_8023AX_RX_RING_SIZE);
+
+	for (port_num = 0; port_num < internals->active_slave_count; port_num++) {
+		struct port *port = &data->port_list[port_num];
+		if ((port->actor.key &
+				rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) == 0) {
+
+			SM_FLAG_SET(port, BEGIN);
+
+			/* LACP is disabled on half duples or link is down */
+			if (SM_FLAG(port, LACP_ENABLED)) {
+				/* If port was enabled set it to BEGIN state */
+				SM_FLAG_CLR(port, LACP_ENABLED);
+				ACTOR_STATE_CLR(port, DISTRIBUTING);
+				ACTOR_STATE_CLR(port, COLLECTING);
+				slaves_changed++;
+			}
+
+			BOND_DEBUG("Port %u is not LACP capable!\n",
+				internals->active_slaves[port_num]);
+			/* Skip this port processing */
+			continue;
+		}
+
+		SM_FLAG_SET(port, LACP_ENABLED);
+		machines_invoked = false;
+		/* Find LACP packet */
+		for (j = 0; j < nb_msgs; j++) {
+			if (msgs[j] == NULL || msgs[j]->port_id !=
+					internals->active_slaves[port_num])
+				continue;
+
+			slow_hdr = rte_pktmbuf_mtod(msgs[j]->pkt,
+					struct slow_protocol_frame *);
+
+			if (slow_hdr->slow_protocol.subtype == SLOW_SUBTYPE_LACP) {
+				/* This is LACP frame so pass it to rx_machine */
+				struct lacpdu *lacp = (struct lacpdu *)&slow_hdr->slow_protocol;
+				/* Invoke state machines on every active slave port */
+				rx_machine(internals, port_num, lacp);
+				periodic_machine(internals, port_num);
+				slaves_changed += mux_machine(internals, port_num);
+				tx_machine(bond_dev, port_num);
+				selection_logic(internals, port_num);
+
+				machines_invoked = true;
+			} else if (slow_hdr->slow_protocol.subtype == SLOW_SUBTYPE_MARKER) {
+				struct marker *marker;
+
+				marker = (struct marker *) &slow_hdr->slow_protocol;
+				if (marker->tlv_type_marker == MARKER_TLV_TYPE_MARKER_INFO) {
+					/* Reuse received packet to send frame to Marker Responder
+					 */
+					marker->tlv_type_marker = MARKER_TLV_TYPE_MARKER_RESP;
+
+					/* Update source MAC, destination MAC is multicast so we
+					 * don't update it */
+					mac_address_get(bond_dev, &slow_hdr->eth_hdr.s_addr);
+
+					if (rte_ring_enqueue(data->tx_ring, msgs[j]) == -ENOBUFS) {
+						RTE_LOG(ERR, PMD,
+						"Failed to enqueue packet into tx ring");
+						rte_pktmbuf_free(msgs[j]->pkt);
+						rte_ring_enqueue(data->free_ring, msgs[j]);
+					}
+
+					msgs[j] = NULL;
+				}
+			}
+		}
+
+		if (machines_invoked == false) {
+			rx_machine(internals, port_num, NULL);
+			periodic_machine(internals, port_num);
+			slaves_changed += mux_machine(internals, port_num);
+			tx_machine(bond_dev, port_num);
+			selection_logic(internals, port_num);
+			machines_invoked = true;
+		}
+
+		SM_FLAG_CLR(port, BEGIN);
+	}
+
+	/* Update mux if something changed */
+	if (slaves_changed > 0) {
+		update_mux_slaves(internals);
+		BOND_DEBUG("mux count %u [%2u%s%2u%s%2u%s%2u%s%s]\n",
+			data->distibuting_slaves_count,
+			data->distibuting_slaves[0],
+			data->distibuting_slaves_count > 0 ? " " : "\b\b",
+			data->distibuting_slaves[1],
+			data->distibuting_slaves_count > 1 ? " " : "\b\b",
+			data->distibuting_slaves[2],
+			data->distibuting_slaves_count > 2 ? " " : "\b\b",
+			data->distibuting_slaves[3],
+			data->distibuting_slaves_count > 3 ? " " : "\b\b",
+			data->distibuting_slaves_count > 4 ? "..." : "");
+	}
+
+	/* Free packets that was not reused */
+	for (port_num = 0; port_num < nb_msgs; port_num++) {
+		if (msgs[port_num] != NULL) {
+			rte_pktmbuf_free(msgs[port_num]->pkt);
+			rte_ring_enqueue(data->free_ring, msgs[port_num]);
+		}
+	}
+
+	rte_eal_alarm_set(BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * 1000,
+			bond_mode_8023ad_periodic_cb, arg);
+}
+
+static void
+bond_mode_8023ad_activate_slave(struct rte_eth_dev *bond_dev, uint8_t num)
+{
+	struct bond_dev_private *internals = bond_dev->data->dev_private;
+	struct mode8023ad_data *data = &internals->mode4;
+
+	struct port *port = &data->port_list[internals->active_slave_count];
+	struct port_params initial = {
+			.system = { { 0 } },
+			.system_priority = rte_cpu_to_be_16(0xFFFF),
+			.key = rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY),
+			.port_priority = rte_cpu_to_be_16(0x00FF),
+			.port_number = 0,
+	};
+
+	memcpy(&port->actor, &initial, sizeof(struct port_params));
+	mac_address_get(bond_dev, &port->actor.system);
+	port->actor.port_number =
+		slave_id_to_port_number(internals->active_slaves[num]);
+
+	memcpy(&port->partner, &initial, sizeof(struct port_params));
+
+	/* default states */
+	port->actor_state = STATE_AGGREGATION | STATE_LACP_ACTIVE | STATE_DEFAULTED;
+	port->partner_state = STATE_LACP_ACTIVE;
+	port->sm_flags = SM_FLAGS_BEGIN;
+
+	/* use this port as agregator */
+	port->used_agregator_idx = num;
+}
+
+void
+bond_mode_8023ad_slave_append(struct rte_eth_dev *bond_dev)
+{
+	struct bond_dev_private *internals = bond_dev->data->dev_private;
+
+	bond_mode_8023ad_activate_slave(bond_dev, internals->active_slave_count);
+}
+
+int
+bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *bond_dev,
+		uint8_t slave_pos)
+{
+	struct bond_dev_private *internals = bond_dev->data->dev_private;
+	struct mode8023ad_data *data = &internals->mode4;
+	struct port *port;
+	uint8_t i;
+
+	bond_mode_8023ad_stop(bond_dev);
+
+	/* Exclude slave from transmit policy. If this slave is an aggregator
+	 * make all aggregated slaves unselected to force sellection logic
+	 * to select suitable aggregator for this port	 */
+	for (i = 0; i < internals->active_slave_count; i++) {
+		port = &data->port_list[slave_pos];
+		if (port->used_agregator_idx == slave_pos) {
+			port->selected = UNSELECTED;
+			port->actor_state &= ~(STATE_SYNCHRONIZATION | STATE_DISTRIBUTING |
+				STATE_COLLECTING);
+
+			/* Use default aggregator */
+			port->used_agregator_idx = i;
+		}
+	}
+
+	port = &data->port_list[slave_pos];
+	timer_cancel(&port->current_while_timer);
+	timer_cancel(&port->periodic_timer);
+	timer_cancel(&port->wait_while_timer);
+	timer_cancel(&port->tx_machine_timer);
+
+	update_mux_slaves(internals);
+
+	/* Remove slave port config */
+	if (slave_pos + 1 < internals->active_slave_count) {
+		memmove(&data->port_list[slave_pos],
+			&data->port_list[slave_pos + 1],
+			sizeof(data->port_list[0]) * internals->active_slave_count -
+				slave_pos - 1);
+	}
+
+	if (bond_dev->data->dev_started)
+		return bond_mode_8023ad_start(bond_dev);
+
+	return 0;
+}
+
+int
+bond_mode_8023ad_init(struct rte_eth_dev *bond_dev)
+{
+	struct bond_dev_private *internals = bond_dev->data->dev_private;
+	struct mode8023ad_data *data = &internals->mode4;
+	char mem_name[RTE_ETH_NAME_MAX_LEN];
+	int socket_id = bond_dev->pci_dev->numa_node;
+	uint8_t i;
+
+	if (data->mbuf_pool == NULL) {
+		const uint16_t element_size = sizeof(struct slow_protocol_frame) +
+			sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM;
+
+		snprintf(mem_name, sizeof(mem_name), "%s_POOL",	bond_dev->data->name);
+		data->mbuf_pool = rte_mempool_create(mem_name,
+			 /* FIXME: How big memory pool should be? If driver will not
+			  * fee packets quick enough there will be ENOMEM in tx_machine.
+			  * For now give 512 packets per slave. Hope it will be enough. */
+			(BOND_MODE_8023AX_TX_RING_SIZE + 1) * (RTE_MAX_ETHPORTS * 512),
+			element_size,
+			RTE_MEMPOOL_CACHE_MAX_SIZE >= 32 ? 32 : RTE_MEMPOOL_CACHE_MAX_SIZE,
+			sizeof(struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init,
+			NULL, rte_pktmbuf_init, NULL, socket_id, 0);
+
+		/* Any memory allocation failure in initalization is critical because
+		 * resources can't be free, so reinitialization is impossible. */
+		if (data->mbuf_pool == NULL) {
+			RTE_LOG(ERR, PMD, "%s: Failed to initialize LACP rx ring\n",
+				bond_dev->data->name);
+
+			rte_panic("Failed to alocate memory pool ('%s')\n"
+				"for bond device '%s'\n", mem_name, bond_dev->data->name);
+		}
+
+		/* Setup ring for free messages that can be used in RX/TX burst */
+		snprintf(mem_name, sizeof(mem_name), "%s_free",
+			bond_dev->data->name);
+
+		uint16_t free_ring_size = BOND_MODE_8023AX_RX_RING_SIZE +
+			BOND_MODE_8023AX_TX_RING_SIZE;
+
+		data->free_ring = rte_ring_create(mem_name,
+			free_ring_size, socket_id, RING_F_SC_DEQ);
+
+		if (data->free_ring == NULL) {
+			rte_panic("%s: Failed to create slow messages free ring\n",
+				bond_dev->data->name);
+		}
+
+		for (i = 0; i < free_ring_size; i++) {
+			struct slow_protocol_msg *msg;
+
+			snprintf(mem_name, sizeof(mem_name), "%s_slow_msg_%u",
+				bond_dev->data->name, i);
+
+			msg = (struct slow_protocol_msg *) rte_malloc_socket(mem_name,
+				sizeof(struct slow_protocol_msg), 0, socket_id);
+
+			if (msg == NULL) {
+				rte_panic("%s: Failed to allocate slow message\n",
+				bond_dev->data->name);
+			}
+
+			rte_ring_enqueue(data->free_ring, msg);
+		}
+
+		/* Setup rings for usage in rx/tx bursts and machines state call back */
+		snprintf(mem_name, sizeof(mem_name), "%s_rx", bond_dev->data->name);
+		data->rx_ring = rte_ring_create(mem_name, BOND_MODE_8023AX_RX_RING_SIZE,
+			socket_id, RING_F_SC_DEQ);
+
+		if (data->rx_ring == NULL) {
+			rte_panic("%s: Failed to create slow messages rx ring\n",
+				bond_dev->data->name);
+		}
+
+		snprintf(mem_name, sizeof(mem_name), "%s_tx", bond_dev->data->name);
+		data->tx_ring = rte_ring_create(mem_name, BOND_MODE_8023AX_TX_RING_SIZE,
+			socket_id, RING_F_SP_ENQ);
+
+		if (data->tx_ring == NULL) {
+			rte_panic("%s: Failed to create slow messages tx ring\n",
+				bond_dev->data->name);
+		}
+	}
+
+	data->distibuting_slaves_count = 0;
+
+	for (i = 0; i < internals->active_slave_count; i++)
+		bond_mode_8023ad_activate_slave(bond_dev, i);
+
+	rte_eth_promiscuous_enable(bond_dev->data->port_id);
+	return 0;
+}
+
+int
+bond_mode_8023ad_start(struct rte_eth_dev *bond_dev)
+{
+	return rte_eal_alarm_set(BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * 1000,
+		bond_mode_8023ad_periodic_cb, bond_dev);
+}
+
+int
+bond_mode_8023ad_stop(struct rte_eth_dev *bond_dev)
+{
+	rte_eal_alarm_cancel(bond_mode_8023ad_periodic_cb, bond_dev);
+	return 0;
+}
+
+void
+bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals,
+	uint8_t slave_pos, struct rte_mbuf *slot_pkt)
+{
+	struct mode8023ad_data *data;
+	struct slow_protocol_msg *msg = NULL;
+
+	data = &internals->mode4;
+
+	if (unlikely(rte_ring_dequeue(data->free_ring, (void **)&msg) ==
+			-ENOBUFS)) {
+		rte_pktmbuf_free(slot_pkt);
+		return;
+	}
+
+	msg->pkt = slot_pkt;
+	msg->port_id = internals->active_slaves[slave_pos];
+
+	if (unlikely(rte_ring_enqueue(data->rx_ring, msg) == -ENOBUFS)) {
+		/* If RX fing full free lacpdu message and drop packet */
+		rte_ring_enqueue(data->free_ring, msg);
+		rte_pktmbuf_free(slot_pkt);
+	}
+}
diff --git a/lib/librte_pmd_bond/rte_eth_bond_8023ad.h b/lib/librte_pmd_bond/rte_eth_bond_8023ad.h
new file mode 100644
index 0000000..3c73f65
--- /dev/null
+++ b/lib/librte_pmd_bond/rte_eth_bond_8023ad.h
@@ -0,0 +1,411 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_ETH_BOND_8023AD_H_
+#define RTE_ETH_BOND_8023AD_H_
+
+#include <stdint.h>
+
+#include <rte_ether.h>
+#include <rte_byteorder.h>
+
+typedef int	bool;
+
+#define true	1
+#define false	0
+
+/**
+ * Constants (5.4.4 in 802.1AX documentation).
+ */
+#define BOND_8023AD_FAST_PERIODIC_MS               1000
+#define BOND_8023AD_SLOW_PERIODIC_MS              30000
+#define BOND_8023AD_SHORT_TIMEOUT_MS               3000
+#define BOND_8023AD_LONG_TIMEOUT_MS               90000
+#define BOND_8023AD_CHURN_DETECTION_TIMEOUT_MS    60000
+#define BOND_8023AD_AGGREGATE_WAIT_TIMEOUT_MS      2000
+#define BOND_8023AD_TX_PERIOD_MS                    333
+/**
+ * Actor/partner states
+ */
+#define STATE_LACP_ACTIVE                   0x01
+#define STATE_LACP_SHORT_TIMEOUT            0x02
+#define STATE_AGGREGATION                   0x04
+#define STATE_SYNCHRONIZATION               0x08
+#define STATE_COLLECTING                    0x10
+#define STATE_DISTRIBUTING                  0x20
+/** Partners parameters are defaulted */
+#define STATE_DEFAULTED                     0x40
+#define STATE_EXPIRED                       0x80
+
+/**
+ * State machine flags
+ */
+#define SM_FLAGS_BEGIN                      0x0001
+#define SM_FLAGS_LACP_ENABLED               0x0002
+#define SM_FLAGS_ACTOR_CHURN                0x0004
+#define SM_FLAGS_PARTNER_CHURN              0x0008
+#define SM_FLAGS_MOVED                      0x0100
+#define SM_FLAGS_PARTNER_SHORT_TIMEOUT      0x0200
+#define SM_FLAGS_NTT                        0x0400
+
+#define BOND_MODE_8023AX_UPDATE_TIMEOUT_MS  100
+#define BOND_MODE_8023AX_RX_RING_SIZE       (2 * RTE_MAX_ETHPORTS)
+#define BOND_MODE_8023AX_TX_RING_SIZE       (2 * RTE_MAX_ETHPORTS)
+
+#define BOND_LINK_FULL_DUPLEX_KEY           0x01
+#define BOND_LINK_SPEED_KEY_10M             0x02
+#define BOND_LINK_SPEED_KEY_100M            0x04
+#define BOND_LINK_SPEED_KEY_1000M           0x08
+#define BOND_LINK_SPEED_KEY_10G             0x10
+#define BOND_LINK_SPEED_KEY_20G             0x11
+#define BOND_LINK_SPEED_KEY_40G             0x12
+
+#define SUBTYPE_LACP                        0x01
+
+#define TLV_TYPE_ACTOR_INFORMATION          0x01
+#define TLV_TYPE_PARTNER_INFORMATION        0x02
+#define TLV_TYPE_COLLECTOR_INFORMATION      0x03
+#define TLV_TYPE_TERMINATOR_INFORMATION     0x00
+
+#define CHECK_FLAGS(_variable, _flags) ((_variable) & (_flags))
+#define SET_FLAGS(_variable, _flags) ((_variable) |= (_flags))
+#define CLEAR_FLAGS(_variable, _flags) ((_variable) &= ~(_flags))
+
+#define SM_FLAG(port, flag) (!!CHECK_FLAGS((port)->sm_flags, SM_FLAGS_ ## flag))
+#define SM_FLAG_SET(port, flag) SET_FLAGS((port)->sm_flags, SM_FLAGS_ ## flag)
+#define SM_FLAG_CLR(port, flag) CLEAR_FLAGS((port)->sm_flags, SM_FLAGS_ ## flag)
+
+#define ACTOR_STATE(port, flag) (!!CHECK_FLAGS((port)->actor_state, STATE_ ## flag))
+#define ACTOR_STATE_SET(port, flag) SET_FLAGS((port)->actor_state, STATE_ ## flag)
+#define ACTOR_STATE_CLR(port, flag) CLEAR_FLAGS((port)->actor_state, STATE_ ## flag)
+
+#define PARTNER_STATE(port, flag) (!!CHECK_FLAGS((port)->partner_state, STATE_ ## flag))
+#define PARTNER_STATE_SET(port, flag) SET_FLAGS((port)->partner_state, STATE_ ## flag)
+#define PARTNER_STATE_CLR(port, flag) CLEAR_FLAGS((port)->partner_state, STATE_ ## flag)
+
+/** Slow protocol LACP frame subtype */
+#define SLOW_SUBTYPE_LACP					0x01
+
+/** Slow procotol marker frame subtype */
+#define SLOW_SUBTYPE_MARKER					0x02
+
+/** Marker type info request */
+#define MARKER_TLV_TYPE_MARKER_INFO			0x01
+
+/** Marker type info response */
+#define MARKER_TLV_TYPE_MARKER_RESP			0x02
+
+/** Generic slow protocol structure */
+struct slow_protocol {
+	uint8_t subtype;
+	uint8_t reserved_119[119];
+} __attribute__((__packed__));
+
+/** Generic slow protocol frame type structure */
+struct slow_protocol_frame {
+	struct ether_hdr eth_hdr;
+	struct slow_protocol slow_protocol;
+} __attribute__((__packed__));
+
+struct port_params {
+	uint16_t system_priority;
+	/**< System priority (unused in current implementation) */
+	struct ether_addr system;
+	/**< System ID - Slave MAC address, same as bonding MAC address */
+	uint16_t key;
+	/**< Speed information (implementation dependednt) and duplex. */
+	uint16_t port_priority;
+	/**< Priority of this (unused in current implementation) */
+	uint16_t port_number;
+	/**< Port number. It corresponds to slave port id. */
+} __attribute__((__packed__));
+
+struct lacpdu_actor_partner_params {
+	uint8_t tlv_type_info;
+	uint8_t info_length;
+	struct port_params port_params;
+	uint8_t state;
+	uint8_t reserved_3[3];
+} __attribute__((__packed__));
+
+/** LACPDU structure (5.4.2 in 802.1AX documentation). */
+struct lacpdu {
+	uint8_t subtype;
+	uint8_t version_number;
+
+	struct lacpdu_actor_partner_params actor;
+	struct lacpdu_actor_partner_params partner;
+
+	uint8_t tlv_type_collector_info;
+	uint8_t collector_info_length;
+	uint16_t collector_max_delay;
+	uint8_t reserved_12[12];
+
+	uint8_t tlv_type_terminator;
+	uint8_t terminator_length;
+	uint8_t reserved_50[50];
+} __attribute__((__packed__));
+
+/** LACPDU frame: Contains ethernet header and LACPDU. */
+struct lacpdu_header {
+	struct ether_hdr eth_hdr;
+	struct lacpdu lacpdu;
+} __attribute__((__packed__));
+
+struct marker {
+	uint8_t subtype;
+	uint8_t version_number;
+
+	uint8_t tlv_type_marker;
+	uint8_t info_length;
+	uint16_t requester_port;
+	struct ether_addr requester_system;
+	uint32_t requester_transaction_id;
+	uint8_t reserved_2[2];
+
+	uint8_t tlv_type_terminator;
+	uint8_t terminator_length;
+	uint8_t reserved_90[90];
+} __attribute__((__packed__));
+
+struct marker_header {
+	struct ether_hdr eth_hdr;
+	struct marker marker;
+} __attribute__((__packed__));
+
+/** Variables associated with the system (5.4.5 in 802.1AX documentation). */
+struct system {
+	struct ether_addr actor_system;
+	/**< The MAC address component of the System Identifier of the System */
+	uint16_t actor_system_priority;
+	/**< The System Priority of the System */
+};
+
+enum selection {
+	UNSELECTED,
+	STANDBY,
+	SELECTED
+};
+
+enum timer_state {
+	TIMER_NOT_STARTED,
+	TIMER_RUNNING,
+	TIMER_EXPIRED,
+};
+
+/** Variables associated with each port (5.4.7 in 802.1AX documentation). */
+struct port {
+	/**
+	 * The operational values of the Actor's state parameters. Bitmask
+	 * of port states.
+	 */
+	uint8_t actor_state;
+
+	/** The operational Actor's port parameters */
+	struct port_params actor;
+
+	/**
+	 * The operational value of the Actor's view of the current values of
+	 * the Partner's state parameters. The Actor sets this variable either
+	 * to the value received from the Partner in an LACPDU, or to the value
+	 * of Partner_Admin_Port_State. Bitmask of port states.
+	 */
+	uint8_t partner_state;
+
+	/** The operational Partner's port parameters */
+	struct port_params partner;
+
+	/* Additional port parameters not listed in documentation */
+	/** State machine flags */
+	uint16_t sm_flags;
+	enum selection selected;
+
+	enum timer_state current_while_timer;
+	enum timer_state periodic_timer;
+	enum timer_state wait_while_timer;
+	enum timer_state tx_machine_timer;
+
+	/* Agregator parameters */
+	/**
+	 * Index in mode8023ad_data::port_list[] of Aggregator
+	 * the port is currently attached to.
+	 */
+	uint16_t used_agregator_idx;
+};
+
+
+/**
+ * Struct used to comunicate with 8023ad logic.
+ */
+struct slow_protocol_msg {
+	struct rte_mbuf *pkt;
+	uint8_t port_id;
+};
+
+/** Data specific to mode 802.1AX */
+struct mode8023ad_data {
+	/** Memory pool used to allocated rings */
+	struct rte_mempool *mbuf_pool;
+
+	/** Ring containing free slow_protocol_msg objects. Used to avoid
+	 * alocating/freeing memory in RX/TX bursts */
+	struct rte_ring *free_ring;
+
+	/** Ring of struct slow_protocol_msg from RX burst function */
+	struct rte_ring *rx_ring;
+
+	/** Ring of struct slow_protocol_msg to RX burst function */
+	struct rte_ring *tx_ring;
+
+	/** list of all enslaved ports in mode 802.1AX */
+	struct port port_list[RTE_MAX_ETHPORTS];
+
+	/** List of slaves ID used to transmiting packets */
+	uint8_t distibuting_slaves[RTE_MAX_ETHPORTS];
+	uint8_t distibuting_slaves_count;
+};
+
+/* Forward declaration */
+struct bond_dev_private;
+
+/**
+ * Configures 802.1AX mode and all active slaves on bonded interface.
+ *
+ * @param dev Bonded interface
+ * @return
+ *  0 on success, negative value otherwise.
+ */
+int
+bond_mode_8023ad_init(struct rte_eth_dev *dev);
+
+/**
+ * Deconfigures 802.1AX mode of the bonded interface and slaves.
+ *
+ * @param dev Bonded interface
+ * @return
+ *   0 on success, negative value otherwise.
+ */
+int bond_mode_8023ad_disable(struct rte_eth_dev *dev);
+
+/**
+ * Starts 802.3AX state machines management logic.
+ * @param dev Bonded interface
+ * @return
+ *   0 on success, negative value otherwise.
+ */
+int
+bond_mode_8023ad_start(struct rte_eth_dev *dev);
+
+/**
+ * Stops 802.3AX state machines management logic.
+ * @param dev Bonded interface
+ * @return
+ *   0 on success, negative value otherwise.
+ */
+
+int
+bond_mode_8023ad_stop(struct rte_eth_dev *dev);
+
+/**
+ * Passes given slow packet to state machines management logic.
+ * @param internals Bonded device private data.
+ * @param slave_pos Possition in active slaves array on which this packet was received.
+ * @param slot_pkt Slow packet
+ */
+void
+bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals,
+	uint8_t slave_pos, struct rte_mbuf *slot_pkt);
+
+/**
+ * Appends and initializes slave active_slaves[slave_num] to use with
+ * 802.1AX mode.
+ *
+ * @pre active_slaves[active_slave_count] must contain valid slave id.
+ * @post active_slave_count must be incremented.
+ *
+ * @param dev       Bonded interface.
+ *
+ * @return
+ *  0 on success, negative value otherwise.
+ */
+void
+bond_mode_8023ad_slave_append(struct rte_eth_dev *dev);
+
+/**
+ * Denitializes and removes given slave from 802.1AX mode.
+ *
+ * @pre active_slaves[slave_num] must contain valid slave id corresponding to
+ * 		slave initialized in 802.1AX mode.
+ * @post active_slaves[slave_num] must be removed.
+ *
+ * @param dev       Bonded interface.
+ * @param slave_num Position of slave in active_slaves array
+ *
+ * @return
+ *  0 on success, negative value otherwise.
+ *
+ */
+int
+bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *dev, uint8_t slave_pos);
+
+/**
+ * Converts port_number from network byte order to port id.
+ *
+ * @param port_number The 8023ad port number to convert.
+ * @return corresponding slave id
+ */
+static inline uint8_t
+port_number_to_slave_id(uint16_t port_number)
+{
+	uint16_t port_id = rte_be_to_cpu_16(port_number);
+	/* Standard requires that port number must be grater than 0.
+	 * Substract 1 to get corresponding slave id */
+	return port_id - 1;
+}
+
+/**
+ * Converts port id to mode 8023ad port number.
+ *
+ * @param slave_id Id of slave to convert.
+ * @return corresponding Port number in network byte order.
+ */
+static inline uint16_t
+slave_id_to_port_number(uint8_t slave_id)
+{
+	/* Standard requires that port ID must be grater than 0.
+	 * Add 1 do get corresponding port_number */
+	uint16_t port_number = (uint16_t)slave_id + 1;
+	return rte_cpu_to_be_16(port_number);
+}
+
+#endif /* RTE_ETH_BOND_8023AD_H_ */
diff --git a/lib/librte_pmd_bond/rte_eth_bond_api.c b/lib/librte_pmd_bond/rte_eth_bond_api.c
index 460df65..264b86e 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_api.c
+++ b/lib/librte_pmd_bond/rte_eth_bond_api.c
@@ -107,24 +107,29 @@  valid_slave_port_id(uint8_t port_id)
 }
 
 void
-rte_eth_bond_activate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id )
+rte_eth_bond_activate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id)
 {
 	struct bond_dev_private *internals = eth_dev->data->dev_private;
 	uint8_t active_count = internals->active_slave_count;
 
 	internals->active_slaves[active_count] = port_id;
 
+	if (internals->mode == BONDING_MODE_8023AD)
+		bond_mode_8023ad_slave_append(eth_dev);
 
 	internals->active_slave_count = active_count + 1;
 }
 
 void
-rte_eth_bond_deactive_slave(struct rte_eth_dev *eth_dev,
-	uint8_t slave_pos )
+rte_eth_bond_deactivate_slave(struct rte_eth_dev *eth_dev,
+	uint8_t slave_pos)
 {
 	struct bond_dev_private *internals = eth_dev->data->dev_private;
 	uint8_t active_count = internals->active_slave_count;
 
+	if (internals->mode == BONDING_MODE_8023AD)
+		bond_mode_8023ad_deactivate_slave(eth_dev, slave_pos);
+
 	active_count--;
 
 	/* If slave was not at the end of the list
@@ -252,13 +257,7 @@  rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
 	eth_dev->dev_ops = &default_dev_ops;
 	eth_dev->pci_dev = pci_dev;
 
-	if (bond_ethdev_mode_set(eth_dev, mode)) {
-		RTE_LOG(ERR, PMD,
-				"%s: failed to set bonded device %d mode too %d\n",
-				__func__, eth_dev->data->port_id, mode);
-		goto err;
-	}
-
+	internals->mode = BONDING_MODE_INVALID;
 	internals->current_primary_port = 0;
 	internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
 	internals->user_defined_mac = 0;
@@ -272,6 +271,13 @@  rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
 	memset(internals->presisted_slaves_conf, 0,
 			sizeof(internals->presisted_slaves_conf));
 
+	if (bond_ethdev_mode_set(eth_dev, mode)) {
+		RTE_LOG(ERR, PMD,
+				"%s: failed to set bonded device %d mode too %d\n",
+				__func__, eth_dev->data->port_id, mode);
+		goto err;
+	}
+
 	return eth_dev->data->port_id;
 
 err:
@@ -428,7 +434,7 @@  rte_eth_bond_slave_remove(uint8_t bonded_port_id, uint8_t slave_port_id)
 			slave_port_id);
 
 	if (pos < internals->active_slave_count)
-		rte_eth_bond_deactive_slave(eth_dev, pos);
+		rte_eth_bond_deactivate_slave(eth_dev, pos);
 
 	pos = -1;
 	/* now remove from slave list */
diff --git a/lib/librte_pmd_bond/rte_eth_bond_args.c b/lib/librte_pmd_bond/rte_eth_bond_args.c
index 11d9816..72458b3 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_args.c
+++ b/lib/librte_pmd_bond/rte_eth_bond_args.c
@@ -170,6 +170,7 @@  bond_ethdev_parse_slave_mode_kvarg(const char *key __rte_unused,
 	case BONDING_MODE_ACTIVE_BACKUP:
 	case BONDING_MODE_BALANCE:
 	case BONDING_MODE_BROADCAST:
+	case BONDING_MODE_8023AD:
 		return 0;
 	default:
 		return -1;
diff --git a/lib/librte_pmd_bond/rte_eth_bond_pmd.c b/lib/librte_pmd_bond/rte_eth_bond_pmd.c
index 482ddb8..9c99755 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_pmd.c
+++ b/lib/librte_pmd_bond/rte_eth_bond_pmd.c
@@ -40,9 +40,11 @@ 
 #include <rte_devargs.h>
 #include <rte_kvargs.h>
 #include <rte_dev.h>
+#include <rte_log.h>
 
 #include "rte_eth_bond.h"
 #include "rte_eth_bond_private.h"
+#include "rte_eth_bond_8023ad.h"
 
 static uint16_t
 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
@@ -167,6 +169,46 @@  bond_ethdev_tx_burst_active_backup(void *queue,
 			bufs, nb_pkts);
 }
 
+static uint16_t
+bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
+		uint16_t nb_pkts)
+{
+	/* Cast to structure, containing bonded device's port id and queue id */
+	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
+	struct bond_dev_private *internals = bd_rx_q->dev_private;
+	struct mode8023ad_data *mode4 = &internals->mode4;
+
+	struct ether_hdr *hdr;
+	struct rte_mbuf *pkts[nb_pkts + 1]; /* one packet more for slow packet */
+
+	uint16_t num_rx_slave = 0;	/* Number of packet received on current slave */
+	uint16_t num_rx_total = 0;	/* Total number of received packets */
+
+	uint8_t i, j;
+
+	for (i = 0; i < internals->active_slave_count && num_rx_total < nb_pkts; i++) {
+		/* Read packets from this slave */
+		num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
+				bd_rx_q->queue_id, pkts, nb_pkts + 1 - num_rx_total);
+
+		/* Separate slow protocol packets from others */
+		for (j = 0; j < num_rx_slave; j++) {
+			hdr = rte_pktmbuf_mtod(pkts[j], struct ether_hdr *);
+
+			uint16_t ether_type = rte_be_to_cpu_16(hdr->ether_type);
+			if (likely(ether_type != ETHER_TYPE_SLOW)) {
+				if (likely(ACTOR_STATE(&mode4->port_list[i], COLLECTING)))
+					bufs[num_rx_total++] = pkts[j];
+				else
+					rte_pktmbuf_free(pkts[j]);
+			} else
+				bond_mode_8023ad_handle_slow_pkt(internals, i, pkts[j]);
+		}
+	}
+
+	return num_rx_total;
+}
+
 static inline uint16_t
 ether_hash(struct ether_hdr *eth_hdr)
 {
@@ -349,6 +391,125 @@  bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
 }
 
 static uint16_t
+bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	struct bond_dev_private *internals;
+	struct mode8023ad_data *mode4;
+	struct bond_tx_queue *bd_tx_q;
+
+	uint8_t num_of_slaves;
+	uint8_t slaves[RTE_MAX_ETHPORTS];
+	 /* possitions in slaves, not ID */
+	uint8_t distributing_slaves[RTE_MAX_ETHPORTS];
+	uint8_t distributing_slaves_count;
+
+	uint16_t num_tx_slave, num_tx_total = 0, tx_fail_total = 0;
+	uint16_t i, op_slave_id;
+
+	/* Slow packets from 802.3AX state machines. */
+	struct slow_protocol_msg *slow_msg;
+
+	/* Allocate one additional packet in case 8023AD mode.
+	 * First element if not NULL is slow packet. */
+	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts + 1];
+	/* Total amount of packets in slave_bufs */
+	uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
+	/* Array of slow packets placed in each slave */
+	uint8_t slave_slow_packets[RTE_MAX_ETHPORTS] = { 0 };
+
+	bd_tx_q = (struct bond_tx_queue *)queue;
+	internals = bd_tx_q->dev_private;
+	mode4 = &internals->mode4;
+
+	/* Copy slave list to protect against slave up/down changes during tx
+	 * bursting */
+	num_of_slaves = internals->active_slave_count;
+	if (num_of_slaves < 1)
+		return num_tx_total;
+
+	memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
+
+	distributing_slaves_count = mode4->distibuting_slaves_count;
+	memcpy(distributing_slaves, mode4->distibuting_slaves,
+			sizeof(slaves[0]) * distributing_slaves_count);
+
+	for (i = 0; i < num_of_slaves; i++)
+		slave_bufs[i][0] = NULL;
+
+	/* It is likely that tx ring will be empty. If it is not empty, it is
+	 * likely that there will be only one frame. */
+	while (unlikely(!rte_ring_empty(mode4->tx_ring)) &&
+			rte_ring_dequeue(mode4->tx_ring, (void **)&slow_msg) != -ENOENT) {
+		i = find_slave_by_id(slaves, num_of_slaves, slow_msg->port_id);
+
+		/* Assign slow packet to slave or drop it if slave is not in active list
+		 * (ex: link down). */
+		if (likely(i < num_of_slaves)) {
+			 /* If there is more than one slow packet to the same slave, send
+			  * only latest, and drop previouse - tx burst was no called quick
+			  * enough. */
+			if (slave_bufs[i][0] != NULL)
+				rte_pktmbuf_free(slave_bufs[i][0]);
+
+			slave_bufs[i][0] = slow_msg->pkt;
+			slave_nb_pkts[i] = 1;
+			slave_slow_packets[i] = 1;
+		} else
+			rte_pktmbuf_free(slow_msg->pkt);
+
+		rte_ring_enqueue(mode4->free_ring, slow_msg);
+	}
+
+	if (likely(distributing_slaves_count > 0)) {
+		/* Populate slaves mbuf with the packets which are to be sent on it */
+		for (i = 0; i < nb_pkts; i++) {
+			/* Select output slave using hash based on xmit policy */
+			op_slave_id = xmit_slave_hash(bufs[i], distributing_slaves_count,
+					internals->balance_xmit_policy);
+
+			/* Populate slave mbuf arrays with mbufs for that slave. Use only slaves
+			 * that are currently distributing. */
+			uint8_t slave_pos = distributing_slaves[op_slave_id];
+			uint16_t pkt_pos = slave_nb_pkts[op_slave_id];
+			slave_nb_pkts[op_slave_id]++;
+
+			slave_bufs[slave_pos][pkt_pos] = bufs[i];
+		}
+	}
+
+	/* Send packet burst on each slave device */
+	for (i = 0; i < num_of_slaves; i++) {
+		if (slave_nb_pkts[i] > 0) {
+			num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
+					slave_bufs[i], slave_nb_pkts[i]);
+
+			/* if tx burst fails move packets to end of bufs */
+			if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
+				uint16_t slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
+
+				/* Free slow packet if it exists and not send. */
+				if (slave_slow_packets[i] != 0 && num_tx_slave == 0) {
+					rte_pktmbuf_free(slave_bufs[i][0]);
+					slave_tx_fail_count--;
+				}
+
+				tx_fail_total += slave_tx_fail_count;
+				memcpy(bufs[nb_pkts - tx_fail_total],
+					slave_bufs[i][num_tx_slave + slave_slow_packets[i]],
+					slave_tx_fail_count);
+			}
+
+			if (num_tx_slave > 0)
+				num_tx_slave -= slave_slow_packets[i];
+
+			num_tx_total += num_tx_slave;
+		}
+	}
+
+	return num_tx_total;
+}
+
+static uint16_t
 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
 		uint16_t nb_pkts)
 {
@@ -505,6 +666,7 @@  mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
 	case BONDING_MODE_ROUND_ROBIN:
 	case BONDING_MODE_BALANCE:
 	case BONDING_MODE_BROADCAST:
+	case BONDING_MODE_8023AD:
 		for (i = 0; i < internals->slave_count; i++) {
 			if (mac_address_set(&rte_eth_devices[internals->slaves[i]],
 					bonded_eth_dev->data->mac_addrs)) {
@@ -568,6 +730,13 @@  bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
 		break;
+	case BONDING_MODE_8023AD:
+		if (bond_mode_8023ad_init(eth_dev) != 0)
+			return -1;
+
+		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
+		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
+		break;
 	default:
 		return -1;
 	}
@@ -764,6 +933,9 @@  bond_ethdev_start(struct rte_eth_dev *eth_dev)
 	if (internals->user_defined_primary_port)
 		bond_ethdev_primary_set(internals, internals->primary_port);
 
+	if (internals->mode == BONDING_MODE_8023AD)
+		bond_mode_8023ad_start(eth_dev);
+
 	return 0;
 }
 
@@ -772,6 +944,9 @@  bond_ethdev_stop(struct rte_eth_dev *eth_dev)
 {
 	struct bond_dev_private *internals = eth_dev->data->dev_private;
 
+	if (internals->mode == BONDING_MODE_8023AD)
+		bond_mode_8023ad_stop(eth_dev);
+
 	internals->active_slave_count = 0;
 
 	eth_dev->data->dev_link.link_status = 0;
@@ -954,6 +1129,7 @@  bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
 	case BONDING_MODE_ROUND_ROBIN:
 	case BONDING_MODE_BALANCE:
 	case BONDING_MODE_BROADCAST:
+	case BONDING_MODE_8023AD:
 		for (i = 0; i < internals->slave_count; i++)
 			rte_eth_promiscuous_enable(internals->slaves[i]);
 		break;
@@ -977,6 +1153,7 @@  bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
 	case BONDING_MODE_ROUND_ROBIN:
 	case BONDING_MODE_BALANCE:
 	case BONDING_MODE_BROADCAST:
+	case BONDING_MODE_8023AD:
 		for (i = 0; i < internals->slave_count; i++)
 			rte_eth_promiscuous_disable(internals->slaves[i]);
 		break;
@@ -1057,7 +1234,7 @@  bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
 			return;
 
 		/* Remove from active slave list */
-		rte_eth_bond_deactive_slave(bonded_eth_dev, active_pos);
+		rte_eth_bond_deactivate_slave(bonded_eth_dev, active_pos);
 
 		/* No active slaves, change link status to down and reset other
 		 * link properties */
diff --git a/lib/librte_pmd_bond/rte_eth_bond_private.h b/lib/librte_pmd_bond/rte_eth_bond_private.h
index 6742a4c..a615205 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_private.h
+++ b/lib/librte_pmd_bond/rte_eth_bond_private.h
@@ -41,6 +41,7 @@  extern "C" {
 #include <rte_ethdev.h>
 
 #include "rte_eth_bond.h"
+#include "rte_eth_bond_8023ad.h"
 
 #define PMD_BOND_SLAVE_PORT_KVARG		("slave")
 #define PMD_BOND_PRIMARY_SLAVE_KVARG	("primary")
@@ -53,6 +54,8 @@  extern "C" {
 #define PMD_BOND_XMIT_POLICY_LAYER23_KVARG	("l23")
 #define PMD_BOND_XMIT_POLICY_LAYER34_KVARG	("l34")
 
+#define BONDING_MODE_INVALID 0xFF
+
 extern const char *pmd_bond_init_valid_arguments[];
 
 extern const char *driver_name;
@@ -123,6 +126,8 @@  struct bond_dev_private {
 
 	/** Persisted configuration of slaves */
 	struct slave_conf presisted_slaves_conf[RTE_MAX_ETHPORTS];
+
+	struct mode8023ad_data mode4;
 };
 
 extern struct eth_dev_ops default_dev_ops;
@@ -130,6 +135,8 @@  extern struct eth_dev_ops default_dev_ops;
 int
 valid_bonded_ethdev(struct rte_eth_dev *eth_dev);
 
+/* Search given slave array to find possition of given id.
+ * Return slave pos or slaves_count if not found. */
 static inline uint8_t
 find_slave_by_id(uint8_t *slaves, uint8_t slaves_count,
 	uint8_t slave_id ) {
@@ -153,7 +160,7 @@  int
 valid_slave_port_id(uint8_t port_id);
 
 void
-rte_eth_bond_deactive_slave(struct rte_eth_dev *eth_dev,
+rte_eth_bond_deactivate_slave(struct rte_eth_dev *eth_dev,
 	uint8_t slave_pos );
 
 void