net/ixgbe: fix blocking system events

Message ID 1577328342-216505-1-git-send-email-taox.zhu@intel.com (mailing list archive)
State Superseded, archived
Delegated to: xiaolong ye
Headers
Series net/ixgbe: fix blocking system events |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-testing success Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-nxp-Performance success Performance Testing PASS
ci/travis-robot success Travis build: passed
ci/Intel-compilation success Compilation OK

Commit Message

Zhu, TaoX Dec. 26, 2019, 2:45 a.m. UTC
  From: Zhu Tao <taox.zhu@intel.com>

IXGBE link status task use rte alarm thread in old implementation.
Sometime ixgbe link status task takes up to 9 seconds. This will
severely affect the rte-alarm-thread-dependent a task in the system,
like  interrupt or hotplug event. So replace with a independent thread
which has the same thread affinity settings as rte interrupt.

Fixes: 0408f47b ("net/ixgbe: fix busy polling while fiber link update")
Cc: stable@dpdk.org

Signed-off-by: Zhu Tao <taox.zhu@intel.com>
---
 drivers/net/ixgbe/ixgbe_ethdev.c | 184 +++++++++++++++++++++++++++++++++++++--
 drivers/net/ixgbe/ixgbe_ethdev.h |  32 +++++++
 2 files changed, 210 insertions(+), 6 deletions(-)
  

Comments

Ananyev, Konstantin Dec. 30, 2019, 1:58 p.m. UTC | #1
Hi, 

> IXGBE link status task use rte alarm thread in old implementation.
> Sometime ixgbe link status task takes up to 9 seconds. This will
> severely affect the rte-alarm-thread-dependent a task in the system,
> like  interrupt or hotplug event. So replace with a independent thread
> which has the same thread affinity settings as rte interrupt.


I don't think it is a good idea to add into PMD code that creates/manages
new threads.
I think ideal would be to rework link setup code, so it can take less time
(make it interruptable/repeatable).
If that is not possible, and control function is the only way,
there is a wrapper function: rte_ctrl_thread_create()
that can be used here instead of creating your own framework.
 

> 
> Fixes: 0408f47b ("net/ixgbe: fix busy polling while fiber link update")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Zhu Tao <taox.zhu@intel.com>
> ---
>  drivers/net/ixgbe/ixgbe_ethdev.c | 184 +++++++++++++++++++++++++++++++++++++--
>  drivers/net/ixgbe/ixgbe_ethdev.h |  32 +++++++
>  2 files changed, 210 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
> index 2c6fd0f..f0b387d 100644
> --- a/drivers/net/ixgbe/ixgbe_ethdev.c
> +++ b/drivers/net/ixgbe/ixgbe_ethdev.c
> @@ -15,6 +15,7 @@
>  #include <rte_byteorder.h>
>  #include <rte_common.h>
>  #include <rte_cycles.h>
> +#include <rte_tailq.h>
> 
>  #include <rte_interrupts.h>
>  #include <rte_log.h>
> @@ -378,6 +379,9 @@ static int ixgbe_dev_udp_tunnel_port_del(struct rte_eth_dev *dev,
>  					 struct rte_eth_udp_tunnel *udp_tunnel);
>  static int ixgbe_filter_restore(struct rte_eth_dev *dev);
>  static void ixgbe_l2_tunnel_conf(struct rte_eth_dev *dev);
> +static int ixgbe_task_thread_init(struct rte_eth_dev *dev);
> +static void ixgbe_task_thread_uninit(struct rte_eth_dev *dev);
> +
> 
>  /*
>   * Define VF Stats MACRO for Non "cleared on read" register
> @@ -1069,6 +1073,171 @@ struct rte_ixgbe_xstats_name_off {
>  }
> 
>  /*
> + * Add a task to task queue tail.
> + */
> +int ixgbe_add_task(struct rte_eth_dev *dev, ixgbe_task_cb_fn task_cb)
> +{
> +	struct ixgbe_adapter *ad = dev->data->dev_private;
> +	struct ixgbe_task *task;
> +
> +	if (ad->task_status == IXGBE_TASK_THREAD_RUNNING) {
> +		task = rte_zmalloc("ixgbe", sizeof(struct ixgbe_task), 0);
> +		if (task == NULL)
> +			return -ENOMEM;
> +
> +		task->arg = dev;
> +		task->task_cb = task_cb;
> +		task->status = IXGBE_TASK_READY;
> +
> +		pthread_mutex_lock(&ad->task_lock);
> +		TAILQ_INSERT_TAIL(&ad->task_head, task, next);
> +		pthread_cond_signal(&ad->task_cond);
> +		pthread_mutex_unlock(&ad->task_lock);
> +	} else {
> +		return -EPERM;	/* Operation not permitted */
> +	}
> +
> +	return 0;
> +}
> +
> +/*
> + * Sync cancel a task with all @task_cb be exit.
> + */
> +int ixgbe_cancel_task(struct rte_eth_dev *dev, ixgbe_task_cb_fn task_cb)
> +{
> +	struct ixgbe_adapter *ad = dev->data->dev_private;
> +	struct ixgbe_task *task, *ttask;
> +	int i, executing;
> +#define DELAY_TIMEOUT_LOG   2000	// 2s
> +#define DELAY_TIMEOUT_MAX   10000	// 10s
> +
> +	for (i = 0; i < DELAY_TIMEOUT_MAX; i++) {
> +		executing = 0;
> +		if (ad->task_status == IXGBE_TASK_THREAD_RUNNING) {
> +			pthread_mutex_lock(&ad->task_lock);
> +			TAILQ_FOREACH_SAFE(task, &ad->task_head, next, ttask) {
> +				if (task->task_cb == task_cb) {
> +					if (task->status == IXGBE_TASK_RUNNING) {
> +						executing++;
> +					} else {
> +						TAILQ_REMOVE(&ad->task_head, task, next);
> +						rte_free(task);
> +					}
> +				}
> +			}
> +			pthread_mutex_unlock(&ad->task_lock);
> +
> +			if (executing) {
> +				if (i > DELAY_TIMEOUT_LOG && (i % 1000 == 0)) {
> +					PMD_DRV_LOG(WARNING,
> +						    "Cannel task time wait %ds!", i / 1000);
> +				}
> +
> +				rte_delay_us_sleep(1000);	// 1ms
> +				continue;
> +			}
> +		}
> +		break;
> +	}
> +
> +	if (i == DELAY_TIMEOUT_MAX)
> +		return -EBUSY;
> +
> +	return 0;
> +}
> +
> +/*
> + * Task main thread. Loop until state is set to IXGBE_TASK_THREAD_EXIT.
> + * For each task, set the status to IXGBE_TASK_RUNNING before execution,
> + * execute and then be dequeue.
> + */
> +static void *ixgbe_task_handler(void *args)
> +{
> +	struct ixgbe_adapter *ad =
> +	    ((struct rte_eth_dev *)args)->data->dev_private;
> +	struct ixgbe_task *task;
> +
> +	PMD_INIT_LOG(DEBUG, "ixgbe task thread created");
> +	while (ad->task_status) {
> +		pthread_mutex_lock(&ad->task_lock);
> +		if (TAILQ_EMPTY(&ad->task_head)) {
> +			pthread_cond_wait(&ad->task_cond, &ad->task_lock);
> +			pthread_mutex_unlock(&ad->task_lock);
> +			continue;
> +		}
> +
> +		/* pop firt task and run it */
> +		task = TAILQ_FIRST(&ad->task_head);
> +		task->status = IXGBE_TASK_RUNNING;
> +		pthread_mutex_unlock(&ad->task_lock);
> +
> +		if (task && task->task_cb)
> +			task->task_cb(task->arg);
> +
> +		pthread_mutex_lock(&ad->task_lock);
> +		TAILQ_REMOVE(&ad->task_head, task, next);
> +		rte_free(task);
> +		pthread_mutex_unlock(&ad->task_lock);
> +	}
> +
> +	pthread_mutex_lock(&ad->task_lock);
> +	while (!TAILQ_EMPTY(&ad->task_head)) {
> +		task = TAILQ_FIRST(&ad->task_head);
> +		TAILQ_REMOVE(&ad->task_head, task, next);
> +		rte_free(task);
> +	}
> +	pthread_mutex_unlock(&ad->task_lock);
> +	ad->task_status = IXGBE_TASK_THREAD_EXIT_DONE;
> +
> +	return NULL;
> +}
> +
> +static int ixgbe_task_thread_init(struct rte_eth_dev *dev)
> +{
> +	struct ixgbe_adapter *ad = dev->data->dev_private;
> +	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
> +	char name[IXGBE_TASK_THREAD_NAME_LEN];
> +	int ret;
> +
> +	TAILQ_INIT(&ad->task_head);
> +	pthread_mutex_init(&ad->task_lock, NULL);
> +	pthread_cond_init(&ad->task_cond, NULL);
> +
> +	snprintf(name, IXGBE_TASK_THREAD_NAME_LEN, "ixgbe-delay:%s",
> +		 pci_dev->name);
> +
> +	ret = rte_ctrl_thread_create(&ad->task_tid, name, NULL,
> +				     ixgbe_task_handler, dev);
> +	if (ret < 0) {
> +		PMD_INIT_LOG(ERR, "Create control thread %s fail!", name);
> +		return ret;
> +	}
> +	ad->task_status = IXGBE_TASK_THREAD_RUNNING;
> +
> +	return 0;
> +}
> +
> +static void ixgbe_task_thread_uninit(struct rte_eth_dev *dev)
> +{
> +	struct ixgbe_adapter *ad = dev->data->dev_private;
> +	int i;
> +#define MAX_EXIT_TIMEOUT  3000	/* 3s */
> +
> +	if (ad->task_status == IXGBE_TASK_THREAD_RUNNING) {
> +		ad->task_status = IXGBE_TASK_THREAD_EXIT;
> +		pthread_cond_signal(&ad->task_cond);
> +		for (i = 0; i < MAX_EXIT_TIMEOUT; i++) {
> +			if (ad->task_status == IXGBE_TASK_THREAD_EXIT_DONE) {
> +				pthread_cond_destroy(&ad->task_cond);
> +				pthread_mutex_destroy(&ad->task_lock);
> +				break;
> +			}
> +			rte_delay_us_sleep(1000);	// 1ms
> +		}
> +	}
> +}
> +
> +/*
>   * This function is based on code in ixgbe_attach() in base/ixgbe.c.
>   * It returns 0 on success.
>   */
> @@ -1301,6 +1470,7 @@ struct rte_ixgbe_xstats_name_off {
>  	/* enable support intr */
>  	ixgbe_enable_intr(eth_dev);
> 
> +	ixgbe_task_thread_init(eth_dev);
>  	ixgbe_dev_set_link_down(eth_dev);
> 
>  	/* initialize filter info */
> @@ -1337,6 +1507,7 @@ struct rte_ixgbe_xstats_name_off {
>  		return 0;
> 
>  	ixgbe_dev_close(eth_dev);
> +	ixgbe_task_thread_uninit(eth_dev);
> 
>  	return 0;
>  }
> @@ -1713,6 +1884,7 @@ static int ixgbe_l2_tn_filter_init(struct rte_eth_dev *eth_dev)
>  				   ixgbevf_dev_interrupt_handler, eth_dev);
>  	rte_intr_enable(intr_handle);
>  	ixgbevf_intr_enable(eth_dev);
> +	ixgbe_task_thread_init(eth_dev);
> 
>  	PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x mac.type=%s",
>  		     eth_dev->data->port_id, pci_dev->id.vendor_id,
> @@ -1732,6 +1904,7 @@ static int ixgbe_l2_tn_filter_init(struct rte_eth_dev *eth_dev)
>  		return 0;
> 
>  	ixgbevf_dev_close(eth_dev);
> +	ixgbe_task_thread_uninit(eth_dev);
> 
>  	return 0;
>  }
> @@ -2570,7 +2743,7 @@ static int eth_ixgbevf_pci_remove(struct rte_pci_device *pci_dev)
>  	}
> 
>  	/* Stop the link setup handler before resetting the HW. */
> -	rte_eal_alarm_cancel(ixgbe_dev_setup_link_alarm_handler, dev);
> +	ixgbe_cancel_task(dev, ixgbe_dev_setup_link_alarm_handler);
> 
>  	/* disable uio/vfio intr/eventfd mapping */
>  	rte_intr_disable(intr_handle);
> @@ -2842,7 +3015,7 @@ static int eth_ixgbevf_pci_remove(struct rte_pci_device *pci_dev)
> 
>  	PMD_INIT_FUNC_TRACE();
> 
> -	rte_eal_alarm_cancel(ixgbe_dev_setup_link_alarm_handler, dev);
> +	ixgbe_cancel_task(dev, ixgbe_dev_setup_link_alarm_handler);
> 
>  	/* disable interrupts */
>  	ixgbe_disable_intr(hw);
> @@ -4162,8 +4335,7 @@ static int ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
>  	if (link_up == 0) {
>  		if (ixgbe_get_media_type(hw) == ixgbe_media_type_fiber) {
>  			intr->flags |= IXGBE_FLAG_NEED_LINK_CONFIG;
> -			rte_eal_alarm_set(10,
> -				ixgbe_dev_setup_link_alarm_handler, dev);
> +			ixgbe_add_task(dev, ixgbe_dev_setup_link_alarm_handler);
>  		}
>  		return rte_eth_linkstatus_set(dev, &link);
>  	}
> @@ -5207,7 +5379,7 @@ static int ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
>  	PMD_INIT_FUNC_TRACE();
> 
>  	/* Stop the link setup handler before resetting the HW. */
> -	rte_eal_alarm_cancel(ixgbe_dev_setup_link_alarm_handler, dev);
> +	ixgbe_cancel_task(dev, ixgbe_dev_setup_link_alarm_handler);
> 
>  	err = hw->mac.ops.reset_hw(hw);
>  	if (err) {
> @@ -5305,7 +5477,7 @@ static int ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
> 
>  	PMD_INIT_FUNC_TRACE();
> 
> -	rte_eal_alarm_cancel(ixgbe_dev_setup_link_alarm_handler, dev);
> +	ixgbe_cancel_task(dev, ixgbe_dev_setup_link_alarm_handler);
> 
>  	ixgbevf_intr_disable(dev);
> 
> diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h
> index 76a1b9d..4426349 100644
> --- a/drivers/net/ixgbe/ixgbe_ethdev.h
> +++ b/drivers/net/ixgbe/ixgbe_ethdev.h
> @@ -470,6 +470,28 @@ struct ixgbe_tm_conf {
>  	bool committed;
>  };
> 
> +#define IXGBE_TASK_THREAD_NAME_LEN  64
> +enum {
> +	IXGBE_TASK_THREAD_EXIT = 0,
> +	IXGBE_TASK_THREAD_RUNNING = 1,
> +	IXGBE_TASK_THREAD_EXIT_DONE = 2
> +};
> +enum {
> +	IXGBE_TASK_READY = 0,
> +	IXGBE_TASK_RUNNING = 1,
> +	IXGBE_TASK_FINISH = 2
> +};
> +
> +typedef void (*ixgbe_task_cb_fn) (void *arg);
> +struct ixgbe_task {
> +	TAILQ_ENTRY(ixgbe_task) next;
> +	void *arg;
> +	int status;
> +	ixgbe_task_cb_fn task_cb;
> +};
> +TAILQ_HEAD(ixgbe_task_head, ixgbe_task);
> +
> +
>  /*
>   * Structure to store private data for each driver instance (for each port).
>   */
> @@ -510,6 +532,13 @@ struct ixgbe_adapter {
>  	 * mailbox status) link status.
>  	 */
>  	uint8_t pflink_fullchk;
> +
> +	/* Control thread per VF/PF, used for to do delay work */
> +	pthread_t task_tid;
> +	struct ixgbe_task_head task_head;
> +	int task_status;
> +	pthread_mutex_t task_lock;
> +	pthread_cond_t task_cond;
>  };
> 
>  struct ixgbe_vf_representor {
> @@ -760,6 +789,9 @@ void ixgbe_dev_macsec_setting_save(struct rte_eth_dev *dev,
>  		struct ixgbe_macsec_setting *macsec_setting);
> 
>  void ixgbe_dev_macsec_setting_reset(struct rte_eth_dev *dev);
> +int ixgbe_add_task(struct rte_eth_dev *dev, ixgbe_task_cb_fn task_cb);
> +int ixgbe_cancel_task(struct rte_eth_dev *dev, ixgbe_task_cb_fn task_cb);
> +
> 
>  static inline int
>  ixgbe_ethertype_filter_lookup(struct ixgbe_filter_info *filter_info,
> --
> 1.8.3.1
  
Zhu, TaoX Jan. 2, 2020, 9:58 a.m. UTC | #2
Hi Konstantin,

Thank you for your advice. Your advice is excellent. I have the same idea with you to solve this problem.
First.  Consider whether the fallback function of alamer can shorten the time. 
But it seems difficult for the following reasons(If you have a good idea, please let me know):
	a. The entry of the fallback function is 'ixgbe_setup_mac_link_multispeed_fiber', It's a little more complicated
	b. This callback function not only has multiple sleeps, but also may have long sleep in other functions it calls
	c. The function called by the callback function has different implementations for different NIC
	d. This callback function and other functions it calls are all from ND's shared code. After modification, upgrading will cause trouble

So I use a separate thread to replace alarm, This thread is created using wrapper function: rte_ctrl_thread_create().
A mechanism is provided to create a thread when PF or VF is initialized for the first time. This thread will select tasks from the task list in order to execute. If the task list is empty, the thread sleeps. 
Provides interface for adding and deleting tasks.
The specific implementation is as follows(See patch for detailed implementation):
	a. Thread create function: ixgbe_task_thread_init, it will call rte_ctrl_thread_create to create thread
	b. Thread destroy function: ixgbe_task_thread_uninit
	c. Add task function: ixgbe_add_task, it add a task and wake up the sleeping thread
	d. Cancel task function: ixgbe_cancel_task, it cancel a task synchronously

The reason why a task queue is used to manage tasks is that there may be concurrent conflicts, because alarm has no concurrent conflicts.

BR,
Zhu, Tao

> -----Original Message-----
> From: Ananyev, Konstantin
> Sent: Monday, December 30, 2019 9:58 PM
> To: Zhu, TaoX <taox.zhu@intel.com>; konstantin.ananyev@intel.com
> wenzhuo.lu@intel.com
> Cc: dev@dpdk.org; stable@dpdk.org
> Subject: RE: [PATCH] net/ixgbe: fix blocking system events
> 
> Hi,
> 
> > IXGBE link status task use rte alarm thread in old implementation.
> > Sometime ixgbe link status task takes up to 9 seconds. This will
> > severely affect the rte-alarm-thread-dependent a task in the system,
> > like  interrupt or hotplug event. So replace with a independent thread
> > which has the same thread affinity settings as rte interrupt.
> 
> 
> I don't think it is a good idea to add into PMD code that creates/manages
> new threads.
> I think ideal would be to rework link setup code, so it can take less time
> (make it interruptable/repeatable).
> If that is not possible, and control function is the only way, there is a wrapper
> function: rte_ctrl_thread_create() that can be used here instead of creating
> your own framework.
> 
> 
> >
> > Fixes: 0408f47b ("net/ixgbe: fix busy polling while fiber link
> > update")
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Zhu Tao <taox.zhu@intel.com>
> > ---
> >  drivers/net/ixgbe/ixgbe_ethdev.c | 184
> > +++++++++++++++++++++++++++++++++++++--
> >  drivers/net/ixgbe/ixgbe_ethdev.h |  32 +++++++
> >  2 files changed, 210 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c
> > b/drivers/net/ixgbe/ixgbe_ethdev.c
> > index 2c6fd0f..f0b387d 100644
> > --- a/drivers/net/ixgbe/ixgbe_ethdev.c
> > +++ b/drivers/net/ixgbe/ixgbe_ethdev.c
> > @@ -15,6 +15,7 @@
> >  #include <rte_byteorder.h>
> >  #include <rte_common.h>
> >  #include <rte_cycles.h>
> > +#include <rte_tailq.h>
> >
> >  #include <rte_interrupts.h>
> >  #include <rte_log.h>
> > @@ -378,6 +379,9 @@ static int ixgbe_dev_udp_tunnel_port_del(struct
> rte_eth_dev *dev,
> >   struct rte_eth_udp_tunnel *udp_tunnel);  static int
> > ixgbe_filter_restore(struct rte_eth_dev *dev);  static void
> > ixgbe_l2_tunnel_conf(struct rte_eth_dev *dev);
> > +static int ixgbe_task_thread_init(struct rte_eth_dev *dev); static
> > +void ixgbe_task_thread_uninit(struct rte_eth_dev *dev);
> > +
> >
> >  /*
> >   * Define VF Stats MACRO for Non "cleared on read" register @@
> > -1069,6 +1073,171 @@ struct rte_ixgbe_xstats_name_off {  }
> >
> >  /*
> > + * Add a task to task queue tail.
> > + */
> > +int ixgbe_add_task(struct rte_eth_dev *dev, ixgbe_task_cb_fn task_cb)
> > +{ struct ixgbe_adapter *ad = dev->data->dev_private; struct
> > +ixgbe_task *task;
> > +
> > +if (ad->task_status == IXGBE_TASK_THREAD_RUNNING) { task =
> > +rte_zmalloc("ixgbe", sizeof(struct ixgbe_task), 0); if (task == NULL)
> > +return -ENOMEM;
> > +
> > +task->arg = dev;
> > +task->task_cb = task_cb;
> > +task->status = IXGBE_TASK_READY;
> > +
> > +pthread_mutex_lock(&ad->task_lock);
> > +TAILQ_INSERT_TAIL(&ad->task_head, task, next);
> > +pthread_cond_signal(&ad->task_cond);
> > +pthread_mutex_unlock(&ad->task_lock);
> > +} else {
> > +return -EPERM;/* Operation not permitted */ }
> > +
> > +return 0;
> > +}
> > +
> > +/*
> > + * Sync cancel a task with all @task_cb be exit.
> > + */
> > +int ixgbe_cancel_task(struct rte_eth_dev *dev, ixgbe_task_cb_fn
> > +task_cb) { struct ixgbe_adapter *ad = dev->data->dev_private; struct
> > +ixgbe_task *task, *ttask; int i, executing;
> > +#define DELAY_TIMEOUT_LOG   2000// 2s
> > +#define DELAY_TIMEOUT_MAX   10000// 10s
> > +
> > +for (i = 0; i < DELAY_TIMEOUT_MAX; i++) { executing = 0; if
> > +(ad->task_status == IXGBE_TASK_THREAD_RUNNING) {
> > +pthread_mutex_lock(&ad->task_lock);
> > +TAILQ_FOREACH_SAFE(task, &ad->task_head, next, ttask) { if
> > +(task->task_cb == task_cb) { if (task->status == IXGBE_TASK_RUNNING)
> > +{
> > +executing++;
> > +} else {
> > +TAILQ_REMOVE(&ad->task_head, task, next); rte_free(task); } } }
> > +pthread_mutex_unlock(&ad->task_lock);
> > +
> > +if (executing) {
> > +if (i > DELAY_TIMEOUT_LOG && (i % 1000 == 0))
> { PMD_DRV_LOG(WARNING,
> > +    "Cannel task time wait %ds!", i / 1000); }
> > +
> > +rte_delay_us_sleep(1000);// 1ms
> > +continue;
> > +}
> > +}
> > +break;
> > +}
> > +
> > +if (i == DELAY_TIMEOUT_MAX)
> > +return -EBUSY;
> > +
> > +return 0;
> > +}
> > +
> > +/*
> > + * Task main thread. Loop until state is set to IXGBE_TASK_THREAD_EXIT.
> > + * For each task, set the status to IXGBE_TASK_RUNNING before
> > +execution,
> > + * execute and then be dequeue.
> > + */
> > +static void *ixgbe_task_handler(void *args) { struct ixgbe_adapter
> > +*ad =
> > +    ((struct rte_eth_dev *)args)->data->dev_private; struct
> > +ixgbe_task *task;
> > +
> > +PMD_INIT_LOG(DEBUG, "ixgbe task thread created"); while
> > +(ad->task_status) { pthread_mutex_lock(&ad->task_lock);
> > +if (TAILQ_EMPTY(&ad->task_head)) {
> > +pthread_cond_wait(&ad->task_cond, &ad->task_lock);
> > +pthread_mutex_unlock(&ad->task_lock);
> > +continue;
> > +}
> > +
> > +/* pop firt task and run it */
> > +task = TAILQ_FIRST(&ad->task_head);
> > +task->status = IXGBE_TASK_RUNNING;
> > +pthread_mutex_unlock(&ad->task_lock);
> > +
> > +if (task && task->task_cb)
> > +task->task_cb(task->arg);
> > +
> > +pthread_mutex_lock(&ad->task_lock);
> > +TAILQ_REMOVE(&ad->task_head, task, next); rte_free(task);
> > +pthread_mutex_unlock(&ad->task_lock);
> > +}
> > +
> > +pthread_mutex_lock(&ad->task_lock);
> > +while (!TAILQ_EMPTY(&ad->task_head)) { task =
> > +TAILQ_FIRST(&ad->task_head); TAILQ_REMOVE(&ad->task_head, task,
> > +next); rte_free(task); } pthread_mutex_unlock(&ad->task_lock);
> > +ad->task_status = IXGBE_TASK_THREAD_EXIT_DONE;
> > +
> > +return NULL;
> > +}
> > +
> > +static int ixgbe_task_thread_init(struct rte_eth_dev *dev) { struct
> > +ixgbe_adapter *ad = dev->data->dev_private; struct rte_pci_device
> > +*pci_dev = RTE_ETH_DEV_TO_PCI(dev); char
> > +name[IXGBE_TASK_THREAD_NAME_LEN];
> > +int ret;
> > +
> > +TAILQ_INIT(&ad->task_head);
> > +pthread_mutex_init(&ad->task_lock, NULL);
> > +pthread_cond_init(&ad->task_cond, NULL);
> > +
> > +snprintf(name, IXGBE_TASK_THREAD_NAME_LEN, "ixgbe-delay:%s",
> > +pci_dev->name);
> > +
> > +ret = rte_ctrl_thread_create(&ad->task_tid, name, NULL,
> > +     ixgbe_task_handler, dev);
> > +if (ret < 0) {
> > +PMD_INIT_LOG(ERR, "Create control thread %s fail!", name); return
> > +ret; }
> > +ad->task_status = IXGBE_TASK_THREAD_RUNNING;
> > +
> > +return 0;
> > +}
> > +
> > +static void ixgbe_task_thread_uninit(struct rte_eth_dev *dev) {
> > +struct ixgbe_adapter *ad = dev->data->dev_private; int i; #define
> > +MAX_EXIT_TIMEOUT  3000/* 3s */
> > +
> > +if (ad->task_status == IXGBE_TASK_THREAD_RUNNING) {
> > +ad->task_status = IXGBE_TASK_THREAD_EXIT;
> > +pthread_cond_signal(&ad->task_cond);
> > +for (i = 0; i < MAX_EXIT_TIMEOUT; i++) { if (ad->task_status ==
> > +IXGBE_TASK_THREAD_EXIT_DONE) { pthread_cond_destroy(&ad-
> >task_cond);
> > +pthread_mutex_destroy(&ad->task_lock);
> > +break;
> > +}
> > +rte_delay_us_sleep(1000);// 1ms
> > +}
> > +}
> > +}
> > +
> > +/*
> >   * This function is based on code in ixgbe_attach() in base/ixgbe.c.
> >   * It returns 0 on success.
> >   */
> > @@ -1301,6 +1470,7 @@ struct rte_ixgbe_xstats_name_off {
> >  /* enable support intr */
> >  ixgbe_enable_intr(eth_dev);
> >
> > +ixgbe_task_thread_init(eth_dev);
> >  ixgbe_dev_set_link_down(eth_dev);
> >
> >  /* initialize filter info */
> > @@ -1337,6 +1507,7 @@ struct rte_ixgbe_xstats_name_off {  return 0;
> >
> >  ixgbe_dev_close(eth_dev);
> > +ixgbe_task_thread_uninit(eth_dev);
> >
> >  return 0;
> >  }
> > @@ -1713,6 +1884,7 @@ static int ixgbe_l2_tn_filter_init(struct
> rte_eth_dev *eth_dev)
> >     ixgbevf_dev_interrupt_handler, eth_dev);
> > rte_intr_enable(intr_handle);  ixgbevf_intr_enable(eth_dev);
> > +ixgbe_task_thread_init(eth_dev);
> >
> >  PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x
> mac.type=%s",
> >       eth_dev->data->port_id, pci_dev->id.vendor_id, @@ -1732,6
> > +1904,7 @@ static int ixgbe_l2_tn_filter_init(struct rte_eth_dev
> > *eth_dev)  return 0;
> >
> >  ixgbevf_dev_close(eth_dev);
> > +ixgbe_task_thread_uninit(eth_dev);
> >
> >  return 0;
> >  }
> > @@ -2570,7 +2743,7 @@ static int eth_ixgbevf_pci_remove(struct
> > rte_pci_device *pci_dev)  }
> >
> >  /* Stop the link setup handler before resetting the HW. */
> > -rte_eal_alarm_cancel(ixgbe_dev_setup_link_alarm_handler, dev);
> > +ixgbe_cancel_task(dev, ixgbe_dev_setup_link_alarm_handler);
> >
> >  /* disable uio/vfio intr/eventfd mapping */
> > rte_intr_disable(intr_handle); @@ -2842,7 +3015,7 @@ static int
> > eth_ixgbevf_pci_remove(struct rte_pci_device *pci_dev)
> >
> >  PMD_INIT_FUNC_TRACE();
> >
> > -rte_eal_alarm_cancel(ixgbe_dev_setup_link_alarm_handler, dev);
> > +ixgbe_cancel_task(dev, ixgbe_dev_setup_link_alarm_handler);
> >
> >  /* disable interrupts */
> >  ixgbe_disable_intr(hw);
> > @@ -4162,8 +4335,7 @@ static int
> > ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
> if
> > (link_up == 0) {  if (ixgbe_get_media_type(hw) ==
> > ixgbe_media_type_fiber) {  intr->flags |=
> IXGBE_FLAG_NEED_LINK_CONFIG;
> > -rte_eal_alarm_set(10, -ixgbe_dev_setup_link_alarm_handler, dev);
> > +ixgbe_add_task(dev, ixgbe_dev_setup_link_alarm_handler);
> >  }
> >  return rte_eth_linkstatus_set(dev, &link);  } @@ -5207,7 +5379,7 @@
> > static int ixgbevf_dev_xstats_get_names(__rte_unused struct
> > rte_eth_dev *dev,  PMD_INIT_FUNC_TRACE();
> >
> >  /* Stop the link setup handler before resetting the HW. */
> > -rte_eal_alarm_cancel(ixgbe_dev_setup_link_alarm_handler, dev);
> > +ixgbe_cancel_task(dev, ixgbe_dev_setup_link_alarm_handler);
> >
> >  err = hw->mac.ops.reset_hw(hw);
> >  if (err) {
> > @@ -5305,7 +5477,7 @@ static int
> > ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
> >
> >  PMD_INIT_FUNC_TRACE();
> >
> > -rte_eal_alarm_cancel(ixgbe_dev_setup_link_alarm_handler, dev);
> > +ixgbe_cancel_task(dev, ixgbe_dev_setup_link_alarm_handler);
> >
> >  ixgbevf_intr_disable(dev);
> >
> > diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h
> > b/drivers/net/ixgbe/ixgbe_ethdev.h
> > index 76a1b9d..4426349 100644
> > --- a/drivers/net/ixgbe/ixgbe_ethdev.h
> > +++ b/drivers/net/ixgbe/ixgbe_ethdev.h
> > @@ -470,6 +470,28 @@ struct ixgbe_tm_conf {  bool committed;  };
> >
> > +#define IXGBE_TASK_THREAD_NAME_LEN  64 enum
> { IXGBE_TASK_THREAD_EXIT
> > += 0, IXGBE_TASK_THREAD_RUNNING = 1,
> IXGBE_TASK_THREAD_EXIT_DONE = 2
> > +}; enum { IXGBE_TASK_READY = 0, IXGBE_TASK_RUNNING = 1,
> > +IXGBE_TASK_FINISH = 2 };
> > +
> > +typedef void (*ixgbe_task_cb_fn) (void *arg); struct ixgbe_task {
> > +TAILQ_ENTRY(ixgbe_task) next;
> > +void *arg;
> > +int status;
> > +ixgbe_task_cb_fn task_cb;
> > +};
> > +TAILQ_HEAD(ixgbe_task_head, ixgbe_task);
> > +
> > +
> >  /*
> >   * Structure to store private data for each driver instance (for each port).
> >   */
> > @@ -510,6 +532,13 @@ struct ixgbe_adapter {
> >   * mailbox status) link status.
> >   */
> >  uint8_t pflink_fullchk;
> > +
> > +/* Control thread per VF/PF, used for to do delay work */ pthread_t
> > +task_tid; struct ixgbe_task_head task_head; int task_status;
> > +pthread_mutex_t task_lock; pthread_cond_t task_cond;
> >  };
> >
> >  struct ixgbe_vf_representor {
> > @@ -760,6 +789,9 @@ void ixgbe_dev_macsec_setting_save(struct
> > rte_eth_dev *dev,  struct ixgbe_macsec_setting *macsec_setting);
> >
> >  void ixgbe_dev_macsec_setting_reset(struct rte_eth_dev *dev);
> > +int ixgbe_add_task(struct rte_eth_dev *dev, ixgbe_task_cb_fn
> > +task_cb); int ixgbe_cancel_task(struct rte_eth_dev *dev,
> > +ixgbe_task_cb_fn task_cb);
> > +
> >
> >  static inline int
> >  ixgbe_ethertype_filter_lookup(struct ixgbe_filter_info *filter_info,
> > --
> > 1.8.3.1
>
  
Ananyev, Konstantin Jan. 7, 2020, 5:34 p.m. UTC | #3
Hi Tao,

> Thank you for your advice. Your advice is excellent. I have the same idea with you to solve this problem.
> First.  Consider whether the fallback function of alamer can shorten the time.
> But it seems difficult for the following reasons(If you have a good idea, please let me know):
> 	a. The entry of the fallback function is 'ixgbe_setup_mac_link_multispeed_fiber', It's a little more complicated
> 	b. This callback function not only has multiple sleeps, but also may have long sleep in other functions it calls
> 	c. The function called by the callback function has different implementations for different NIC
> 	d. This callback function and other functions it calls are all from ND's shared code. After modification, upgrading will cause
> trouble

Agree, I also don't see a way to overcome that problem
without reworking ixgbe/base code quite significantly.
Which obviously is not an option. 

> 
> So I use a separate thread to replace alarm, This thread is created using wrapper function: rte_ctrl_thread_create().
> A mechanism is provided to create a thread when PF or VF is initialized for the first time. This thread will select tasks from the task list
> in order to execute. If the task list is empty, the thread sleeps.
> Provides interface for adding and deleting tasks.
> The specific implementation is as follows(See patch for detailed implementation):
> 	a. Thread create function: ixgbe_task_thread_init, it will call rte_ctrl_thread_create to create thread
> 	b. Thread destroy function: ixgbe_task_thread_uninit
> 	c. Add task function: ixgbe_add_task, it add a task and wake up the sleeping thread
> 	d. Cancel task function: ixgbe_cancel_task, it cancel a task synchronously
> 
> The reason why a task queue is used to manage tasks is that there may be concurrent conflicts, because alarm has no concurrent
> conflicts.

Sorry, I probably wasn't clear in my previous mail.
I realized what you did, but I think it is not a good practice
to add task management thread/code into specific PMD.
If we do need such code, then it probably has to be generic enough and  be in EAL.
Though I wonder do we really need it for that case?
Can't we just spawn a thread to execute ixgbe_dev_setup_link_alarm_handler?
I.E just :
rte_ctrl_thread_create(&hw->link_thread, ...., ixgbe_dev_setup_link_alarm_handler, dev);
instead of rte_eal_alarm_set()
And pthread_cancel(&hw->link_thread); pthread_join(hw->link_thread);
Instead of rte_alarm_cancel().

> 
> BR,
> Zhu, Tao
> 
> > -----Original Message-----
> > From: Ananyev, Konstantin
> > Sent: Monday, December 30, 2019 9:58 PM
> > To: Zhu, TaoX <taox.zhu@intel.com>; konstantin.ananyev@intel.com
> > wenzhuo.lu@intel.com
> > Cc: dev@dpdk.org; stable@dpdk.org
> > Subject: RE: [PATCH] net/ixgbe: fix blocking system events
> >
> > Hi,
> >
> > > IXGBE link status task use rte alarm thread in old implementation.
> > > Sometime ixgbe link status task takes up to 9 seconds. This will
> > > severely affect the rte-alarm-thread-dependent a task in the system,
> > > like  interrupt or hotplug event. So replace with a independent thread
> > > which has the same thread affinity settings as rte interrupt.
> >
> >
> > I don't think it is a good idea to add into PMD code that creates/manages
> > new threads.
> > I think ideal would be to rework link setup code, so it can take less time
> > (make it interruptable/repeatable).
> > If that is not possible, and control function is the only way, there is a wrapper
> > function: rte_ctrl_thread_create() that can be used here instead of creating
> > your own framework.
> >
> >
> > >
> > > Fixes: 0408f47b ("net/ixgbe: fix busy polling while fiber link
> > > update")
> > > Cc: stable@dpdk.org
> > >
> > > Signed-off-by: Zhu Tao <taox.zhu@intel.com>
> > > ---
> > >  drivers/net/ixgbe/ixgbe_ethdev.c | 184
> > > +++++++++++++++++++++++++++++++++++++--
> > >  drivers/net/ixgbe/ixgbe_ethdev.h |  32 +++++++
> > >  2 files changed, 210 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c
> > > b/drivers/net/ixgbe/ixgbe_ethdev.c
> > > index 2c6fd0f..f0b387d 100644
> > > --- a/drivers/net/ixgbe/ixgbe_ethdev.c
> > > +++ b/drivers/net/ixgbe/ixgbe_ethdev.c
> > > @@ -15,6 +15,7 @@
> > >  #include <rte_byteorder.h>
> > >  #include <rte_common.h>
> > >  #include <rte_cycles.h>
> > > +#include <rte_tailq.h>
> > >
> > >  #include <rte_interrupts.h>
> > >  #include <rte_log.h>
> > > @@ -378,6 +379,9 @@ static int ixgbe_dev_udp_tunnel_port_del(struct
> > rte_eth_dev *dev,
> > >   struct rte_eth_udp_tunnel *udp_tunnel);  static int
> > > ixgbe_filter_restore(struct rte_eth_dev *dev);  static void
> > > ixgbe_l2_tunnel_conf(struct rte_eth_dev *dev);
> > > +static int ixgbe_task_thread_init(struct rte_eth_dev *dev); static
> > > +void ixgbe_task_thread_uninit(struct rte_eth_dev *dev);
> > > +
> > >
> > >  /*
> > >   * Define VF Stats MACRO for Non "cleared on read" register @@
> > > -1069,6 +1073,171 @@ struct rte_ixgbe_xstats_name_off {  }
> > >
> > >  /*
> > > + * Add a task to task queue tail.
> > > + */
> > > +int ixgbe_add_task(struct rte_eth_dev *dev, ixgbe_task_cb_fn task_cb)
> > > +{ struct ixgbe_adapter *ad = dev->data->dev_private; struct
> > > +ixgbe_task *task;
> > > +
> > > +if (ad->task_status == IXGBE_TASK_THREAD_RUNNING) { task =
> > > +rte_zmalloc("ixgbe", sizeof(struct ixgbe_task), 0); if (task == NULL)
> > > +return -ENOMEM;
> > > +
> > > +task->arg = dev;
> > > +task->task_cb = task_cb;
> > > +task->status = IXGBE_TASK_READY;
> > > +
> > > +pthread_mutex_lock(&ad->task_lock);
> > > +TAILQ_INSERT_TAIL(&ad->task_head, task, next);
> > > +pthread_cond_signal(&ad->task_cond);
> > > +pthread_mutex_unlock(&ad->task_lock);
> > > +} else {
> > > +return -EPERM;/* Operation not permitted */ }
> > > +
> > > +return 0;
> > > +}
> > > +
> > > +/*
> > > + * Sync cancel a task with all @task_cb be exit.
> > > + */
> > > +int ixgbe_cancel_task(struct rte_eth_dev *dev, ixgbe_task_cb_fn
> > > +task_cb) { struct ixgbe_adapter *ad = dev->data->dev_private; struct
> > > +ixgbe_task *task, *ttask; int i, executing;
> > > +#define DELAY_TIMEOUT_LOG   2000// 2s
> > > +#define DELAY_TIMEOUT_MAX   10000// 10s
> > > +
> > > +for (i = 0; i < DELAY_TIMEOUT_MAX; i++) { executing = 0; if
> > > +(ad->task_status == IXGBE_TASK_THREAD_RUNNING) {
> > > +pthread_mutex_lock(&ad->task_lock);
> > > +TAILQ_FOREACH_SAFE(task, &ad->task_head, next, ttask) { if
> > > +(task->task_cb == task_cb) { if (task->status == IXGBE_TASK_RUNNING)
> > > +{
> > > +executing++;
> > > +} else {
> > > +TAILQ_REMOVE(&ad->task_head, task, next); rte_free(task); } } }
> > > +pthread_mutex_unlock(&ad->task_lock);
> > > +
> > > +if (executing) {
> > > +if (i > DELAY_TIMEOUT_LOG && (i % 1000 == 0))
> > { PMD_DRV_LOG(WARNING,
> > > +    "Cannel task time wait %ds!", i / 1000); }
> > > +
> > > +rte_delay_us_sleep(1000);// 1ms
> > > +continue;
> > > +}
> > > +}
> > > +break;
> > > +}
> > > +
> > > +if (i == DELAY_TIMEOUT_MAX)
> > > +return -EBUSY;
> > > +
> > > +return 0;
> > > +}
> > > +
> > > +/*
> > > + * Task main thread. Loop until state is set to IXGBE_TASK_THREAD_EXIT.
> > > + * For each task, set the status to IXGBE_TASK_RUNNING before
> > > +execution,
> > > + * execute and then be dequeue.
> > > + */
> > > +static void *ixgbe_task_handler(void *args) { struct ixgbe_adapter
> > > +*ad =
> > > +    ((struct rte_eth_dev *)args)->data->dev_private; struct
> > > +ixgbe_task *task;
> > > +
> > > +PMD_INIT_LOG(DEBUG, "ixgbe task thread created"); while
> > > +(ad->task_status) { pthread_mutex_lock(&ad->task_lock);
> > > +if (TAILQ_EMPTY(&ad->task_head)) {
> > > +pthread_cond_wait(&ad->task_cond, &ad->task_lock);
> > > +pthread_mutex_unlock(&ad->task_lock);
> > > +continue;
> > > +}
> > > +
> > > +/* pop firt task and run it */
> > > +task = TAILQ_FIRST(&ad->task_head);
> > > +task->status = IXGBE_TASK_RUNNING;
> > > +pthread_mutex_unlock(&ad->task_lock);
> > > +
> > > +if (task && task->task_cb)
> > > +task->task_cb(task->arg);
> > > +
> > > +pthread_mutex_lock(&ad->task_lock);
> > > +TAILQ_REMOVE(&ad->task_head, task, next); rte_free(task);
> > > +pthread_mutex_unlock(&ad->task_lock);
> > > +}
> > > +
> > > +pthread_mutex_lock(&ad->task_lock);
> > > +while (!TAILQ_EMPTY(&ad->task_head)) { task =
> > > +TAILQ_FIRST(&ad->task_head); TAILQ_REMOVE(&ad->task_head, task,
> > > +next); rte_free(task); } pthread_mutex_unlock(&ad->task_lock);
> > > +ad->task_status = IXGBE_TASK_THREAD_EXIT_DONE;
> > > +
> > > +return NULL;
> > > +}
> > > +
> > > +static int ixgbe_task_thread_init(struct rte_eth_dev *dev) { struct
> > > +ixgbe_adapter *ad = dev->data->dev_private; struct rte_pci_device
> > > +*pci_dev = RTE_ETH_DEV_TO_PCI(dev); char
> > > +name[IXGBE_TASK_THREAD_NAME_LEN];
> > > +int ret;
> > > +
> > > +TAILQ_INIT(&ad->task_head);
> > > +pthread_mutex_init(&ad->task_lock, NULL);
> > > +pthread_cond_init(&ad->task_cond, NULL);
> > > +
> > > +snprintf(name, IXGBE_TASK_THREAD_NAME_LEN, "ixgbe-delay:%s",
> > > +pci_dev->name);
> > > +
> > > +ret = rte_ctrl_thread_create(&ad->task_tid, name, NULL,
> > > +     ixgbe_task_handler, dev);
> > > +if (ret < 0) {
> > > +PMD_INIT_LOG(ERR, "Create control thread %s fail!", name); return
> > > +ret; }
> > > +ad->task_status = IXGBE_TASK_THREAD_RUNNING;
> > > +
> > > +return 0;
> > > +}
> > > +
> > > +static void ixgbe_task_thread_uninit(struct rte_eth_dev *dev) {
> > > +struct ixgbe_adapter *ad = dev->data->dev_private; int i; #define
> > > +MAX_EXIT_TIMEOUT  3000/* 3s */
> > > +
> > > +if (ad->task_status == IXGBE_TASK_THREAD_RUNNING) {
> > > +ad->task_status = IXGBE_TASK_THREAD_EXIT;
> > > +pthread_cond_signal(&ad->task_cond);
> > > +for (i = 0; i < MAX_EXIT_TIMEOUT; i++) { if (ad->task_status ==
> > > +IXGBE_TASK_THREAD_EXIT_DONE) { pthread_cond_destroy(&ad-
> > >task_cond);
> > > +pthread_mutex_destroy(&ad->task_lock);
> > > +break;
> > > +}
> > > +rte_delay_us_sleep(1000);// 1ms
> > > +}
> > > +}
> > > +}
> > > +
> > > +/*
> > >   * This function is based on code in ixgbe_attach() in base/ixgbe.c.
> > >   * It returns 0 on success.
> > >   */
> > > @@ -1301,6 +1470,7 @@ struct rte_ixgbe_xstats_name_off {
> > >  /* enable support intr */
> > >  ixgbe_enable_intr(eth_dev);
> > >
> > > +ixgbe_task_thread_init(eth_dev);
> > >  ixgbe_dev_set_link_down(eth_dev);
> > >
> > >  /* initialize filter info */
> > > @@ -1337,6 +1507,7 @@ struct rte_ixgbe_xstats_name_off {  return 0;
> > >
> > >  ixgbe_dev_close(eth_dev);
> > > +ixgbe_task_thread_uninit(eth_dev);
> > >
> > >  return 0;
> > >  }
> > > @@ -1713,6 +1884,7 @@ static int ixgbe_l2_tn_filter_init(struct
> > rte_eth_dev *eth_dev)
> > >     ixgbevf_dev_interrupt_handler, eth_dev);
> > > rte_intr_enable(intr_handle);  ixgbevf_intr_enable(eth_dev);
> > > +ixgbe_task_thread_init(eth_dev);
> > >
> > >  PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x
> > mac.type=%s",
> > >       eth_dev->data->port_id, pci_dev->id.vendor_id, @@ -1732,6
> > > +1904,7 @@ static int ixgbe_l2_tn_filter_init(struct rte_eth_dev
> > > *eth_dev)  return 0;
> > >
> > >  ixgbevf_dev_close(eth_dev);
> > > +ixgbe_task_thread_uninit(eth_dev);
> > >
> > >  return 0;
> > >  }
> > > @@ -2570,7 +2743,7 @@ static int eth_ixgbevf_pci_remove(struct
> > > rte_pci_device *pci_dev)  }
> > >
> > >  /* Stop the link setup handler before resetting the HW. */
> > > -rte_eal_alarm_cancel(ixgbe_dev_setup_link_alarm_handler, dev);
> > > +ixgbe_cancel_task(dev, ixgbe_dev_setup_link_alarm_handler);
> > >
> > >  /* disable uio/vfio intr/eventfd mapping */
> > > rte_intr_disable(intr_handle); @@ -2842,7 +3015,7 @@ static int
> > > eth_ixgbevf_pci_remove(struct rte_pci_device *pci_dev)
> > >
> > >  PMD_INIT_FUNC_TRACE();
> > >
> > > -rte_eal_alarm_cancel(ixgbe_dev_setup_link_alarm_handler, dev);
> > > +ixgbe_cancel_task(dev, ixgbe_dev_setup_link_alarm_handler);
> > >
> > >  /* disable interrupts */
> > >  ixgbe_disable_intr(hw);
> > > @@ -4162,8 +4335,7 @@ static int
> > > ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
> > if
> > > (link_up == 0) {  if (ixgbe_get_media_type(hw) ==
> > > ixgbe_media_type_fiber) {  intr->flags |=
> > IXGBE_FLAG_NEED_LINK_CONFIG;
> > > -rte_eal_alarm_set(10, -ixgbe_dev_setup_link_alarm_handler, dev);
> > > +ixgbe_add_task(dev, ixgbe_dev_setup_link_alarm_handler);
> > >  }
> > >  return rte_eth_linkstatus_set(dev, &link);  } @@ -5207,7 +5379,7 @@
> > > static int ixgbevf_dev_xstats_get_names(__rte_unused struct
> > > rte_eth_dev *dev,  PMD_INIT_FUNC_TRACE();
> > >
> > >  /* Stop the link setup handler before resetting the HW. */
> > > -rte_eal_alarm_cancel(ixgbe_dev_setup_link_alarm_handler, dev);
> > > +ixgbe_cancel_task(dev, ixgbe_dev_setup_link_alarm_handler);
> > >
> > >  err = hw->mac.ops.reset_hw(hw);
> > >  if (err) {
> > > @@ -5305,7 +5477,7 @@ static int
> > > ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
> > >
> > >  PMD_INIT_FUNC_TRACE();
> > >
> > > -rte_eal_alarm_cancel(ixgbe_dev_setup_link_alarm_handler, dev);
> > > +ixgbe_cancel_task(dev, ixgbe_dev_setup_link_alarm_handler);
> > >
> > >  ixgbevf_intr_disable(dev);
> > >
> > > diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h
> > > b/drivers/net/ixgbe/ixgbe_ethdev.h
> > > index 76a1b9d..4426349 100644
> > > --- a/drivers/net/ixgbe/ixgbe_ethdev.h
> > > +++ b/drivers/net/ixgbe/ixgbe_ethdev.h
> > > @@ -470,6 +470,28 @@ struct ixgbe_tm_conf {  bool committed;  };
> > >
> > > +#define IXGBE_TASK_THREAD_NAME_LEN  64 enum
> > { IXGBE_TASK_THREAD_EXIT
> > > += 0, IXGBE_TASK_THREAD_RUNNING = 1,
> > IXGBE_TASK_THREAD_EXIT_DONE = 2
> > > +}; enum { IXGBE_TASK_READY = 0, IXGBE_TASK_RUNNING = 1,
> > > +IXGBE_TASK_FINISH = 2 };
> > > +
> > > +typedef void (*ixgbe_task_cb_fn) (void *arg); struct ixgbe_task {
> > > +TAILQ_ENTRY(ixgbe_task) next;
> > > +void *arg;
> > > +int status;
> > > +ixgbe_task_cb_fn task_cb;
> > > +};
> > > +TAILQ_HEAD(ixgbe_task_head, ixgbe_task);
> > > +
> > > +
> > >  /*
> > >   * Structure to store private data for each driver instance (for each port).
> > >   */
> > > @@ -510,6 +532,13 @@ struct ixgbe_adapter {
> > >   * mailbox status) link status.
> > >   */
> > >  uint8_t pflink_fullchk;
> > > +
> > > +/* Control thread per VF/PF, used for to do delay work */ pthread_t
> > > +task_tid; struct ixgbe_task_head task_head; int task_status;
> > > +pthread_mutex_t task_lock; pthread_cond_t task_cond;
> > >  };
> > >
> > >  struct ixgbe_vf_representor {
> > > @@ -760,6 +789,9 @@ void ixgbe_dev_macsec_setting_save(struct
> > > rte_eth_dev *dev,  struct ixgbe_macsec_setting *macsec_setting);
> > >
> > >  void ixgbe_dev_macsec_setting_reset(struct rte_eth_dev *dev);
> > > +int ixgbe_add_task(struct rte_eth_dev *dev, ixgbe_task_cb_fn
> > > +task_cb); int ixgbe_cancel_task(struct rte_eth_dev *dev,
> > > +ixgbe_task_cb_fn task_cb);
> > > +
> > >
> > >  static inline int
> > >  ixgbe_ethertype_filter_lookup(struct ixgbe_filter_info *filter_info,
> > > --
> > > 1.8.3.1
> >
>
  

Patch

diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 2c6fd0f..f0b387d 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -15,6 +15,7 @@ 
 #include <rte_byteorder.h>
 #include <rte_common.h>
 #include <rte_cycles.h>
+#include <rte_tailq.h>
 
 #include <rte_interrupts.h>
 #include <rte_log.h>
@@ -378,6 +379,9 @@  static int ixgbe_dev_udp_tunnel_port_del(struct rte_eth_dev *dev,
 					 struct rte_eth_udp_tunnel *udp_tunnel);
 static int ixgbe_filter_restore(struct rte_eth_dev *dev);
 static void ixgbe_l2_tunnel_conf(struct rte_eth_dev *dev);
+static int ixgbe_task_thread_init(struct rte_eth_dev *dev);
+static void ixgbe_task_thread_uninit(struct rte_eth_dev *dev);
+
 
 /*
  * Define VF Stats MACRO for Non "cleared on read" register
@@ -1069,6 +1073,171 @@  struct rte_ixgbe_xstats_name_off {
 }
 
 /*
+ * Add a task to task queue tail.
+ */
+int ixgbe_add_task(struct rte_eth_dev *dev, ixgbe_task_cb_fn task_cb)
+{
+	struct ixgbe_adapter *ad = dev->data->dev_private;
+	struct ixgbe_task *task;
+
+	if (ad->task_status == IXGBE_TASK_THREAD_RUNNING) {
+		task = rte_zmalloc("ixgbe", sizeof(struct ixgbe_task), 0);
+		if (task == NULL)
+			return -ENOMEM;
+
+		task->arg = dev;
+		task->task_cb = task_cb;
+		task->status = IXGBE_TASK_READY;
+
+		pthread_mutex_lock(&ad->task_lock);
+		TAILQ_INSERT_TAIL(&ad->task_head, task, next);
+		pthread_cond_signal(&ad->task_cond);
+		pthread_mutex_unlock(&ad->task_lock);
+	} else {
+		return -EPERM;	/* Operation not permitted */
+	}
+
+	return 0;
+}
+
+/*
+ * Sync cancel a task with all @task_cb be exit.
+ */
+int ixgbe_cancel_task(struct rte_eth_dev *dev, ixgbe_task_cb_fn task_cb)
+{
+	struct ixgbe_adapter *ad = dev->data->dev_private;
+	struct ixgbe_task *task, *ttask;
+	int i, executing;
+#define DELAY_TIMEOUT_LOG   2000	// 2s
+#define DELAY_TIMEOUT_MAX   10000	// 10s
+
+	for (i = 0; i < DELAY_TIMEOUT_MAX; i++) {
+		executing = 0;
+		if (ad->task_status == IXGBE_TASK_THREAD_RUNNING) {
+			pthread_mutex_lock(&ad->task_lock);
+			TAILQ_FOREACH_SAFE(task, &ad->task_head, next, ttask) {
+				if (task->task_cb == task_cb) {
+					if (task->status == IXGBE_TASK_RUNNING) {
+						executing++;
+					} else {
+						TAILQ_REMOVE(&ad->task_head, task, next);
+						rte_free(task);
+					}
+				}
+			}
+			pthread_mutex_unlock(&ad->task_lock);
+
+			if (executing) {
+				if (i > DELAY_TIMEOUT_LOG && (i % 1000 == 0)) {
+					PMD_DRV_LOG(WARNING,
+						    "Cannel task time wait %ds!", i / 1000);
+				}
+
+				rte_delay_us_sleep(1000);	// 1ms
+				continue;
+			}
+		}
+		break;
+	}
+
+	if (i == DELAY_TIMEOUT_MAX)
+		return -EBUSY;
+
+	return 0;
+}
+
+/*
+ * Task main thread. Loop until state is set to IXGBE_TASK_THREAD_EXIT.
+ * For each task, set the status to IXGBE_TASK_RUNNING before execution,
+ * execute and then be dequeue.
+ */
+static void *ixgbe_task_handler(void *args)
+{
+	struct ixgbe_adapter *ad =
+	    ((struct rte_eth_dev *)args)->data->dev_private;
+	struct ixgbe_task *task;
+
+	PMD_INIT_LOG(DEBUG, "ixgbe task thread created");
+	while (ad->task_status) {
+		pthread_mutex_lock(&ad->task_lock);
+		if (TAILQ_EMPTY(&ad->task_head)) {
+			pthread_cond_wait(&ad->task_cond, &ad->task_lock);
+			pthread_mutex_unlock(&ad->task_lock);
+			continue;
+		}
+
+		/* pop firt task and run it */
+		task = TAILQ_FIRST(&ad->task_head);
+		task->status = IXGBE_TASK_RUNNING;
+		pthread_mutex_unlock(&ad->task_lock);
+
+		if (task && task->task_cb)
+			task->task_cb(task->arg);
+
+		pthread_mutex_lock(&ad->task_lock);
+		TAILQ_REMOVE(&ad->task_head, task, next);
+		rte_free(task);
+		pthread_mutex_unlock(&ad->task_lock);
+	}
+
+	pthread_mutex_lock(&ad->task_lock);
+	while (!TAILQ_EMPTY(&ad->task_head)) {
+		task = TAILQ_FIRST(&ad->task_head);
+		TAILQ_REMOVE(&ad->task_head, task, next);
+		rte_free(task);
+	}
+	pthread_mutex_unlock(&ad->task_lock);
+	ad->task_status = IXGBE_TASK_THREAD_EXIT_DONE;
+
+	return NULL;
+}
+
+static int ixgbe_task_thread_init(struct rte_eth_dev *dev)
+{
+	struct ixgbe_adapter *ad = dev->data->dev_private;
+	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
+	char name[IXGBE_TASK_THREAD_NAME_LEN];
+	int ret;
+
+	TAILQ_INIT(&ad->task_head);
+	pthread_mutex_init(&ad->task_lock, NULL);
+	pthread_cond_init(&ad->task_cond, NULL);
+
+	snprintf(name, IXGBE_TASK_THREAD_NAME_LEN, "ixgbe-delay:%s",
+		 pci_dev->name);
+
+	ret = rte_ctrl_thread_create(&ad->task_tid, name, NULL,
+				     ixgbe_task_handler, dev);
+	if (ret < 0) {
+		PMD_INIT_LOG(ERR, "Create control thread %s fail!", name);
+		return ret;
+	}
+	ad->task_status = IXGBE_TASK_THREAD_RUNNING;
+
+	return 0;
+}
+
+static void ixgbe_task_thread_uninit(struct rte_eth_dev *dev)
+{
+	struct ixgbe_adapter *ad = dev->data->dev_private;
+	int i;
+#define MAX_EXIT_TIMEOUT  3000	/* 3s */
+
+	if (ad->task_status == IXGBE_TASK_THREAD_RUNNING) {
+		ad->task_status = IXGBE_TASK_THREAD_EXIT;
+		pthread_cond_signal(&ad->task_cond);
+		for (i = 0; i < MAX_EXIT_TIMEOUT; i++) {
+			if (ad->task_status == IXGBE_TASK_THREAD_EXIT_DONE) {
+				pthread_cond_destroy(&ad->task_cond);
+				pthread_mutex_destroy(&ad->task_lock);
+				break;
+			}
+			rte_delay_us_sleep(1000);	// 1ms
+		}
+	}
+}
+
+/*
  * This function is based on code in ixgbe_attach() in base/ixgbe.c.
  * It returns 0 on success.
  */
@@ -1301,6 +1470,7 @@  struct rte_ixgbe_xstats_name_off {
 	/* enable support intr */
 	ixgbe_enable_intr(eth_dev);
 
+	ixgbe_task_thread_init(eth_dev);
 	ixgbe_dev_set_link_down(eth_dev);
 
 	/* initialize filter info */
@@ -1337,6 +1507,7 @@  struct rte_ixgbe_xstats_name_off {
 		return 0;
 
 	ixgbe_dev_close(eth_dev);
+	ixgbe_task_thread_uninit(eth_dev);
 
 	return 0;
 }
@@ -1713,6 +1884,7 @@  static int ixgbe_l2_tn_filter_init(struct rte_eth_dev *eth_dev)
 				   ixgbevf_dev_interrupt_handler, eth_dev);
 	rte_intr_enable(intr_handle);
 	ixgbevf_intr_enable(eth_dev);
+	ixgbe_task_thread_init(eth_dev);
 
 	PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x mac.type=%s",
 		     eth_dev->data->port_id, pci_dev->id.vendor_id,
@@ -1732,6 +1904,7 @@  static int ixgbe_l2_tn_filter_init(struct rte_eth_dev *eth_dev)
 		return 0;
 
 	ixgbevf_dev_close(eth_dev);
+	ixgbe_task_thread_uninit(eth_dev);
 
 	return 0;
 }
@@ -2570,7 +2743,7 @@  static int eth_ixgbevf_pci_remove(struct rte_pci_device *pci_dev)
 	}
 
 	/* Stop the link setup handler before resetting the HW. */
-	rte_eal_alarm_cancel(ixgbe_dev_setup_link_alarm_handler, dev);
+	ixgbe_cancel_task(dev, ixgbe_dev_setup_link_alarm_handler);
 
 	/* disable uio/vfio intr/eventfd mapping */
 	rte_intr_disable(intr_handle);
@@ -2842,7 +3015,7 @@  static int eth_ixgbevf_pci_remove(struct rte_pci_device *pci_dev)
 
 	PMD_INIT_FUNC_TRACE();
 
-	rte_eal_alarm_cancel(ixgbe_dev_setup_link_alarm_handler, dev);
+	ixgbe_cancel_task(dev, ixgbe_dev_setup_link_alarm_handler);
 
 	/* disable interrupts */
 	ixgbe_disable_intr(hw);
@@ -4162,8 +4335,7 @@  static int ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 	if (link_up == 0) {
 		if (ixgbe_get_media_type(hw) == ixgbe_media_type_fiber) {
 			intr->flags |= IXGBE_FLAG_NEED_LINK_CONFIG;
-			rte_eal_alarm_set(10,
-				ixgbe_dev_setup_link_alarm_handler, dev);
+			ixgbe_add_task(dev, ixgbe_dev_setup_link_alarm_handler);
 		}
 		return rte_eth_linkstatus_set(dev, &link);
 	}
@@ -5207,7 +5379,7 @@  static int ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 	PMD_INIT_FUNC_TRACE();
 
 	/* Stop the link setup handler before resetting the HW. */
-	rte_eal_alarm_cancel(ixgbe_dev_setup_link_alarm_handler, dev);
+	ixgbe_cancel_task(dev, ixgbe_dev_setup_link_alarm_handler);
 
 	err = hw->mac.ops.reset_hw(hw);
 	if (err) {
@@ -5305,7 +5477,7 @@  static int ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 
 	PMD_INIT_FUNC_TRACE();
 
-	rte_eal_alarm_cancel(ixgbe_dev_setup_link_alarm_handler, dev);
+	ixgbe_cancel_task(dev, ixgbe_dev_setup_link_alarm_handler);
 
 	ixgbevf_intr_disable(dev);
 
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h
index 76a1b9d..4426349 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.h
+++ b/drivers/net/ixgbe/ixgbe_ethdev.h
@@ -470,6 +470,28 @@  struct ixgbe_tm_conf {
 	bool committed;
 };
 
+#define IXGBE_TASK_THREAD_NAME_LEN  64
+enum {
+	IXGBE_TASK_THREAD_EXIT = 0,
+	IXGBE_TASK_THREAD_RUNNING = 1,
+	IXGBE_TASK_THREAD_EXIT_DONE = 2
+};
+enum {
+	IXGBE_TASK_READY = 0,
+	IXGBE_TASK_RUNNING = 1,
+	IXGBE_TASK_FINISH = 2
+};
+
+typedef void (*ixgbe_task_cb_fn) (void *arg);
+struct ixgbe_task {
+	TAILQ_ENTRY(ixgbe_task) next;
+	void *arg;
+	int status;
+	ixgbe_task_cb_fn task_cb;
+};
+TAILQ_HEAD(ixgbe_task_head, ixgbe_task);
+
+
 /*
  * Structure to store private data for each driver instance (for each port).
  */
@@ -510,6 +532,13 @@  struct ixgbe_adapter {
 	 * mailbox status) link status.
 	 */
 	uint8_t pflink_fullchk;
+
+	/* Control thread per VF/PF, used for to do delay work */
+	pthread_t task_tid;
+	struct ixgbe_task_head task_head;
+	int task_status;
+	pthread_mutex_t task_lock;
+	pthread_cond_t task_cond;
 };
 
 struct ixgbe_vf_representor {
@@ -760,6 +789,9 @@  void ixgbe_dev_macsec_setting_save(struct rte_eth_dev *dev,
 		struct ixgbe_macsec_setting *macsec_setting);
 
 void ixgbe_dev_macsec_setting_reset(struct rte_eth_dev *dev);
+int ixgbe_add_task(struct rte_eth_dev *dev, ixgbe_task_cb_fn task_cb);
+int ixgbe_cancel_task(struct rte_eth_dev *dev, ixgbe_task_cb_fn task_cb);
+
 
 static inline int
 ixgbe_ethertype_filter_lookup(struct ixgbe_filter_info *filter_info,