[v2,14/17] net/mlx5: add async action push and pull support

Message ID 20220928033130.9106-15-suanmingm@nvidia.com (mailing list archive)
State Superseded, archived
Delegated to: Raslan Darawsheh
Headers
Series net/mlx5: HW steering PMD update |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Suanming Mou Sept. 28, 2022, 3:31 a.m. UTC
  The queue based rte_flow_async_action_* functions work same as
queue based async flow functions. The operations can be pushed
asynchronously, so is the pull.

This commit adds the async action missing push and pull support.

Signed-off-by: Suanming Mou <suanmingm@nvidia.com>
---
 drivers/net/mlx5/mlx5.h            |  62 ++++-
 drivers/net/mlx5/mlx5_flow.c       |  45 ++++
 drivers/net/mlx5/mlx5_flow.h       |  17 ++
 drivers/net/mlx5/mlx5_flow_aso.c   | 181 +++++++++++--
 drivers/net/mlx5/mlx5_flow_dv.c    |   7 +-
 drivers/net/mlx5/mlx5_flow_hw.c    | 412 +++++++++++++++++++++++++----
 drivers/net/mlx5/mlx5_flow_meter.c |   6 +-
 7 files changed, 626 insertions(+), 104 deletions(-)
  

Patch

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index eca719f269..5d92df8965 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -341,6 +341,8 @@  struct mlx5_lb_ctx {
 enum {
 	MLX5_HW_Q_JOB_TYPE_CREATE, /* Flow create job type. */
 	MLX5_HW_Q_JOB_TYPE_DESTROY, /* Flow destroy job type. */
+	MLX5_HW_Q_JOB_TYPE_UPDATE,
+	MLX5_HW_Q_JOB_TYPE_QUERY,
 };
 
 #define MLX5_HW_MAX_ITEMS (16)
@@ -348,12 +350,23 @@  enum {
 /* HW steering flow management job descriptor. */
 struct mlx5_hw_q_job {
 	uint32_t type; /* Job type. */
-	struct rte_flow_hw *flow; /* Flow attached to the job. */
+	union {
+		struct rte_flow_hw *flow; /* Flow attached to the job. */
+		const void *action; /* Indirect action attached to the job. */
+	};
 	void *user_data; /* Job user data. */
 	uint8_t *encap_data; /* Encap data. */
 	struct mlx5_modification_cmd *mhdr_cmd;
 	struct rte_flow_item *items;
-	struct rte_flow_item_ethdev port_spec;
+	union {
+		struct {
+			/* Pointer to ct query user memory. */
+			struct rte_flow_action_conntrack *profile;
+			/* Pointer to ct ASO query out memory. */
+			void *out_data;
+		} __rte_packed;
+		struct rte_flow_item_ethdev port_spec;
+	} __rte_packed;
 };
 
 /* HW steering job descriptor LIFO pool. */
@@ -361,6 +374,8 @@  struct mlx5_hw_q {
 	uint32_t job_idx; /* Free job index. */
 	uint32_t size; /* LIFO size. */
 	struct mlx5_hw_q_job **job; /* LIFO header. */
+	struct rte_ring *indir_cq; /* Indirect action SW completion queue. */
+	struct rte_ring *indir_iq; /* Indirect action SW in progress queue. */
 } __rte_cache_aligned;
 
 
@@ -569,6 +584,7 @@  struct mlx5_aso_sq_elem {
 			struct mlx5_aso_ct_action *ct;
 			char *query_data;
 		};
+		void *user_data;
 	};
 };
 
@@ -578,7 +594,9 @@  struct mlx5_aso_sq {
 	struct mlx5_aso_cq cq;
 	struct mlx5_devx_sq sq_obj;
 	struct mlx5_pmd_mr mr;
+	volatile struct mlx5_aso_wqe *db;
 	uint16_t pi;
+	uint16_t db_pi;
 	uint32_t head;
 	uint32_t tail;
 	uint32_t sqn;
@@ -993,6 +1011,7 @@  struct mlx5_flow_meter_profile {
 enum mlx5_aso_mtr_state {
 	ASO_METER_FREE, /* In free list. */
 	ASO_METER_WAIT, /* ACCESS_ASO WQE in progress. */
+	ASO_METER_WAIT_ASYNC, /* CQE will be handled by async pull. */
 	ASO_METER_READY, /* CQE received. */
 };
 
@@ -1195,6 +1214,7 @@  struct mlx5_bond_info {
 enum mlx5_aso_ct_state {
 	ASO_CONNTRACK_FREE, /* Inactive, in the free list. */
 	ASO_CONNTRACK_WAIT, /* WQE sent in the SQ. */
+	ASO_CONNTRACK_WAIT_ASYNC, /* CQE will be handled by async pull. */
 	ASO_CONNTRACK_READY, /* CQE received w/o error. */
 	ASO_CONNTRACK_QUERY, /* WQE for query sent. */
 	ASO_CONNTRACK_MAX, /* Guard. */
@@ -1203,13 +1223,21 @@  enum mlx5_aso_ct_state {
 /* Generic ASO connection tracking structure. */
 struct mlx5_aso_ct_action {
 	union {
-		LIST_ENTRY(mlx5_aso_ct_action) next;
-		/* Pointer to the next ASO CT. Used only in SWS. */
-		struct mlx5_aso_ct_pool *pool;
-		/* Pointer to action pool. Used only in HWS. */
+		/* SWS mode struct. */
+		struct {
+			/* Pointer to the next ASO CT. Used only in SWS. */
+			LIST_ENTRY(mlx5_aso_ct_action) next;
+		};
+		/* HWS mode struct. */
+		struct {
+			/* Pointer to action pool. Used only in HWS. */
+			struct mlx5_aso_ct_pool *pool;
+		};
 	};
-	void *dr_action_orig; /* General action object for original dir. */
-	void *dr_action_rply; /* General action object for reply dir. */
+	/* General action object for original dir. */
+	void *dr_action_orig;
+	/* General action object for reply dir. */
+	void *dr_action_rply;
 	uint32_t refcnt; /* Action used count in device flows. */
 	uint16_t offset; /* Offset of ASO CT in DevX objects bulk. */
 	uint16_t peer; /* The only peer port index could also use this CT. */
@@ -2135,18 +2163,21 @@  int mlx5_aso_flow_hit_queue_poll_stop(struct mlx5_dev_ctx_shared *sh);
 void mlx5_aso_queue_uninit(struct mlx5_dev_ctx_shared *sh,
 			   enum mlx5_access_aso_opc_mod aso_opc_mod);
 int mlx5_aso_meter_update_by_wqe(struct mlx5_dev_ctx_shared *sh, uint32_t queue,
-				 struct mlx5_aso_mtr *mtr,
-				 struct mlx5_mtr_bulk *bulk);
+		struct mlx5_aso_mtr *mtr, struct mlx5_mtr_bulk *bulk,
+		void *user_data, bool push);
 int mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh, uint32_t queue,
 		struct mlx5_aso_mtr *mtr);
 int mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh, uint32_t queue,
 			      struct mlx5_aso_ct_action *ct,
-			      const struct rte_flow_action_conntrack *profile);
+			      const struct rte_flow_action_conntrack *profile,
+			      void *user_data,
+			      bool push);
 int mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh, uint32_t queue,
 			   struct mlx5_aso_ct_action *ct);
 int mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh, uint32_t queue,
 			     struct mlx5_aso_ct_action *ct,
-			     struct rte_flow_action_conntrack *profile);
+			     struct rte_flow_action_conntrack *profile,
+			     void *user_data, bool push);
 int mlx5_aso_ct_available(struct mlx5_dev_ctx_shared *sh, uint32_t queue,
 			  struct mlx5_aso_ct_action *ct);
 uint32_t
@@ -2154,6 +2185,13 @@  mlx5_get_supported_sw_parsing_offloads(const struct mlx5_hca_attr *attr);
 uint32_t
 mlx5_get_supported_tunneling_offloads(const struct mlx5_hca_attr *attr);
 
+void mlx5_aso_ct_obj_analyze(struct rte_flow_action_conntrack *profile,
+			     char *wdata);
+void mlx5_aso_push_wqe(struct mlx5_dev_ctx_shared *sh,
+		       struct mlx5_aso_sq *sq);
+int mlx5_aso_pull_completion(struct mlx5_aso_sq *sq,
+			     struct rte_flow_op_result res[],
+			     uint16_t n_res);
 int mlx5_aso_cnt_queue_init(struct mlx5_dev_ctx_shared *sh);
 void mlx5_aso_cnt_queue_uninit(struct mlx5_dev_ctx_shared *sh);
 int mlx5_aso_cnt_query(struct mlx5_dev_ctx_shared *sh,
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 4bfa604578..bc2ccb4d3c 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -979,6 +979,14 @@  mlx5_flow_async_action_handle_destroy(struct rte_eth_dev *dev, uint32_t queue,
 				  void *user_data,
 				  struct rte_flow_error *error);
 
+static int
+mlx5_flow_async_action_handle_query(struct rte_eth_dev *dev, uint32_t queue,
+				 const struct rte_flow_op_attr *attr,
+				 const struct rte_flow_action_handle *handle,
+				 void *data,
+				 void *user_data,
+				 struct rte_flow_error *error);
+
 static const struct rte_flow_ops mlx5_flow_ops = {
 	.validate = mlx5_flow_validate,
 	.create = mlx5_flow_create,
@@ -1015,6 +1023,7 @@  static const struct rte_flow_ops mlx5_flow_ops = {
 	.push = mlx5_flow_push,
 	.async_action_handle_create = mlx5_flow_async_action_handle_create,
 	.async_action_handle_update = mlx5_flow_async_action_handle_update,
+	.async_action_handle_query = mlx5_flow_async_action_handle_query,
 	.async_action_handle_destroy = mlx5_flow_async_action_handle_destroy,
 };
 
@@ -8858,6 +8867,42 @@  mlx5_flow_async_action_handle_update(struct rte_eth_dev *dev, uint32_t queue,
 					 update, user_data, error);
 }
 
+/**
+ * Query shared action.
+ *
+ * @param[in] dev
+ *   Pointer to the rte_eth_dev structure.
+ * @param[in] queue
+ *   Which queue to be used..
+ * @param[in] attr
+ *   Operation attribute.
+ * @param[in] handle
+ *   Action handle to be updated.
+ * @param[in] data
+ *   Pointer query result data.
+ * @param[in] user_data
+ *   Pointer to the user_data.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, negative value otherwise and rte_errno is set.
+ */
+static int
+mlx5_flow_async_action_handle_query(struct rte_eth_dev *dev, uint32_t queue,
+				    const struct rte_flow_op_attr *attr,
+				    const struct rte_flow_action_handle *handle,
+				    void *data,
+				    void *user_data,
+				    struct rte_flow_error *error)
+{
+	const struct mlx5_flow_driver_ops *fops =
+			flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
+
+	return fops->async_action_query(dev, queue, attr, handle,
+					data, user_data, error);
+}
+
 /**
  * Destroy shared action.
  *
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 30a18ea35e..e45869a890 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -57,6 +57,13 @@  enum mlx5_rte_flow_field_id {
 
 #define MLX5_INDIRECT_ACTION_TYPE_OFFSET 29
 
+#define MLX5_INDIRECT_ACTION_TYPE_GET(handle) \
+	(((uint32_t)(uintptr_t)(handle)) >> MLX5_INDIRECT_ACTION_TYPE_OFFSET)
+
+#define MLX5_INDIRECT_ACTION_IDX_GET(handle) \
+	(((uint32_t)(uintptr_t)(handle)) & \
+	 ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1))
+
 enum {
 	MLX5_INDIRECT_ACTION_TYPE_RSS,
 	MLX5_INDIRECT_ACTION_TYPE_AGE,
@@ -1816,6 +1823,15 @@  typedef int (*mlx5_flow_async_action_handle_update_t)
 			 void *user_data,
 			 struct rte_flow_error *error);
 
+typedef int (*mlx5_flow_async_action_handle_query_t)
+			(struct rte_eth_dev *dev,
+			 uint32_t queue,
+			 const struct rte_flow_op_attr *attr,
+			 const struct rte_flow_action_handle *handle,
+			 void *data,
+			 void *user_data,
+			 struct rte_flow_error *error);
+
 typedef int (*mlx5_flow_async_action_handle_destroy_t)
 			(struct rte_eth_dev *dev,
 			 uint32_t queue,
@@ -1878,6 +1894,7 @@  struct mlx5_flow_driver_ops {
 	mlx5_flow_push_t push;
 	mlx5_flow_async_action_handle_create_t async_action_create;
 	mlx5_flow_async_action_handle_update_t async_action_update;
+	mlx5_flow_async_action_handle_query_t async_action_query;
 	mlx5_flow_async_action_handle_destroy_t async_action_destroy;
 };
 
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index f371fff2e2..43ef893e9d 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -519,6 +519,70 @@  mlx5_aso_cqe_err_handle(struct mlx5_aso_sq *sq)
 			       (volatile uint32_t *)&sq->sq_obj.aso_wqes[idx]);
 }
 
+int
+mlx5_aso_pull_completion(struct mlx5_aso_sq *sq,
+			 struct rte_flow_op_result res[],
+			 uint16_t n_res)
+{
+	struct mlx5_aso_cq *cq = &sq->cq;
+	volatile struct mlx5_cqe *restrict cqe;
+	const uint32_t cq_size = 1 << cq->log_desc_n;
+	const uint32_t mask = cq_size - 1;
+	uint32_t idx;
+	uint32_t next_idx;
+	uint16_t max;
+	uint16_t n = 0;
+	int ret;
+
+	max = (uint16_t)(sq->head - sq->tail);
+	if (unlikely(!max || !n_res))
+		return 0;
+	next_idx = cq->cq_ci & mask;
+	do {
+		idx = next_idx;
+		next_idx = (cq->cq_ci + 1) & mask;
+		/* Need to confirm the position of the prefetch. */
+		rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
+		cqe = &cq->cq_obj.cqes[idx];
+		ret = check_cqe(cqe, cq_size, cq->cq_ci);
+		/*
+		 * Be sure owner read is done before any other cookie field or
+		 * opaque field.
+		 */
+		rte_io_rmb();
+		if (ret == MLX5_CQE_STATUS_HW_OWN)
+			break;
+		res[n].user_data = sq->elts[(uint16_t)((sq->tail + n) & mask)].user_data;
+		if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
+			mlx5_aso_cqe_err_handle(sq);
+			res[n].status = RTE_FLOW_OP_ERROR;
+		} else {
+			res[n].status = RTE_FLOW_OP_SUCCESS;
+		}
+		cq->cq_ci++;
+		if (++n == n_res)
+			break;
+	} while (1);
+	if (likely(n)) {
+		sq->tail += n;
+		rte_io_wmb();
+		cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
+	}
+	return n;
+}
+
+void
+mlx5_aso_push_wqe(struct mlx5_dev_ctx_shared *sh,
+		  struct mlx5_aso_sq *sq)
+{
+	if (sq->db_pi == sq->pi)
+		return;
+	mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)sq->db,
+			   sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
+			   !sh->tx_uar.dbnc);
+	sq->db_pi = sq->pi;
+}
+
 /**
  * Update ASO objects upon completion.
  *
@@ -728,7 +792,9 @@  mlx5_aso_mtr_sq_enqueue_single(struct mlx5_dev_ctx_shared *sh,
 			       struct mlx5_aso_sq *sq,
 			       struct mlx5_aso_mtr *aso_mtr,
 			       struct mlx5_mtr_bulk *bulk,
-				   bool need_lock)
+			       bool need_lock,
+			       void *user_data,
+			       bool push)
 {
 	volatile struct mlx5_aso_wqe *wqe = NULL;
 	struct mlx5_flow_meter_info *fm = NULL;
@@ -754,7 +820,7 @@  mlx5_aso_mtr_sq_enqueue_single(struct mlx5_dev_ctx_shared *sh,
 	rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
 	/* Fill next WQE. */
 	fm = &aso_mtr->fm;
-	sq->elts[sq->head & mask].mtr = aso_mtr;
+	sq->elts[sq->head & mask].mtr = user_data ? user_data : aso_mtr;
 	if (aso_mtr->type == ASO_METER_INDIRECT) {
 		if (likely(sh->config.dv_flow_en == 2))
 			pool = aso_mtr->pool;
@@ -820,9 +886,13 @@  mlx5_aso_mtr_sq_enqueue_single(struct mlx5_dev_ctx_shared *sh,
 	 */
 	sq->head++;
 	sq->pi += 2;/* Each WQE contains 2 WQEBB's. */
-	mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe,
+	if (push) {
+		mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe,
 			   sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
 			   !sh->tx_uar.dbnc);
+		sq->db_pi = sq->pi;
+	}
+	sq->db = wqe;
 	if (need_lock)
 		rte_spinlock_unlock(&sq->sqsl);
 	return 1;
@@ -912,11 +982,14 @@  mlx5_aso_mtr_completion_handle(struct mlx5_aso_sq *sq, bool need_lock)
 int
 mlx5_aso_meter_update_by_wqe(struct mlx5_dev_ctx_shared *sh, uint32_t queue,
 			struct mlx5_aso_mtr *mtr,
-			struct mlx5_mtr_bulk *bulk)
+			struct mlx5_mtr_bulk *bulk,
+			void *user_data,
+			bool push)
 {
 	struct mlx5_aso_sq *sq;
 	uint32_t poll_wqe_times = MLX5_MTR_POLL_WQE_CQE_TIMES;
 	bool need_lock;
+	int ret;
 
 	if (likely(sh->config.dv_flow_en == 2)) {
 		if (queue == MLX5_HW_INV_QUEUE) {
@@ -930,10 +1003,15 @@  mlx5_aso_meter_update_by_wqe(struct mlx5_dev_ctx_shared *sh, uint32_t queue,
 		sq = &sh->mtrmng->pools_mng.sq;
 		need_lock = true;
 	}
+	if (queue != MLX5_HW_INV_QUEUE) {
+		ret = mlx5_aso_mtr_sq_enqueue_single(sh, sq, mtr, bulk,
+						     need_lock, user_data, push);
+		return ret > 0 ? 0 : -1;
+	}
 	do {
 		mlx5_aso_mtr_completion_handle(sq, need_lock);
-		if (mlx5_aso_mtr_sq_enqueue_single(sh, sq, mtr,
-						   bulk, need_lock))
+		if (mlx5_aso_mtr_sq_enqueue_single(sh, sq, mtr, bulk,
+						   need_lock, NULL, true))
 			return 0;
 		/* Waiting for wqe resource. */
 		rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
@@ -962,6 +1040,7 @@  mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh, uint32_t queue,
 {
 	struct mlx5_aso_sq *sq;
 	uint32_t poll_cqe_times = MLX5_MTR_POLL_WQE_CQE_TIMES;
+	uint8_t state;
 	bool need_lock;
 
 	if (likely(sh->config.dv_flow_en == 2)) {
@@ -976,8 +1055,8 @@  mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh, uint32_t queue,
 		sq = &sh->mtrmng->pools_mng.sq;
 		need_lock = true;
 	}
-	if (__atomic_load_n(&mtr->state, __ATOMIC_RELAXED) ==
-					    ASO_METER_READY)
+	state = __atomic_load_n(&mtr->state, __ATOMIC_RELAXED);
+	if (state == ASO_METER_READY || state == ASO_METER_WAIT_ASYNC)
 		return 0;
 	do {
 		mlx5_aso_mtr_completion_handle(sq, need_lock);
@@ -1093,7 +1172,9 @@  mlx5_aso_ct_sq_enqueue_single(struct mlx5_dev_ctx_shared *sh,
 			      struct mlx5_aso_sq *sq,
 			      struct mlx5_aso_ct_action *ct,
 			      const struct rte_flow_action_conntrack *profile,
-			      bool need_lock)
+			      bool need_lock,
+			      void *user_data,
+			      bool push)
 {
 	volatile struct mlx5_aso_wqe *wqe = NULL;
 	uint16_t size = 1 << sq->log_desc_n;
@@ -1117,10 +1198,16 @@  mlx5_aso_ct_sq_enqueue_single(struct mlx5_dev_ctx_shared *sh,
 	wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
 	rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
 	/* Fill next WQE. */
-	MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_WAIT);
-	sq->elts[sq->head & mask].ct = ct;
-	sq->elts[sq->head & mask].query_data = NULL;
+	MLX5_ASO_CT_UPDATE_STATE(ct,
+			user_data ? ASO_CONNTRACK_WAIT_ASYNC : ASO_CONNTRACK_WAIT);
+	if (user_data) {
+		sq->elts[sq->head & mask].user_data = user_data;
+	} else {
+		sq->elts[sq->head & mask].ct = ct;
+		sq->elts[sq->head & mask].query_data = NULL;
+	}
 	pool = __mlx5_aso_ct_get_pool(sh, ct);
+
 	/* Each WQE will have a single CT object. */
 	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
 						  ct->offset);
@@ -1200,9 +1287,13 @@  mlx5_aso_ct_sq_enqueue_single(struct mlx5_dev_ctx_shared *sh,
 		 profile->reply_dir.max_ack);
 	sq->head++;
 	sq->pi += 2; /* Each WQE contains 2 WQEBB's. */
-	mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe,
-			   sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
-			   !sh->tx_uar.dbnc);
+	if (push) {
+		mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe,
+				   sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
+				   !sh->tx_uar.dbnc);
+		sq->db_pi = sq->pi;
+	}
+	sq->db = wqe;
 	if (need_lock)
 		rte_spinlock_unlock(&sq->sqsl);
 	return 1;
@@ -1258,7 +1349,9 @@  static int
 mlx5_aso_ct_sq_query_single(struct mlx5_dev_ctx_shared *sh,
 			    struct mlx5_aso_sq *sq,
 			    struct mlx5_aso_ct_action *ct, char *data,
-			    bool need_lock)
+			    bool need_lock,
+			    void *user_data,
+			    bool push)
 {
 	volatile struct mlx5_aso_wqe *wqe = NULL;
 	uint16_t size = 1 << sq->log_desc_n;
@@ -1284,14 +1377,23 @@  mlx5_aso_ct_sq_query_single(struct mlx5_dev_ctx_shared *sh,
 		DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
 		return 0;
 	}
-	MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_QUERY);
+	MLX5_ASO_CT_UPDATE_STATE(ct,
+			user_data ? ASO_CONNTRACK_WAIT_ASYNC : ASO_CONNTRACK_QUERY);
 	wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
 	/* Confirm the location and address of the prefetch instruction. */
 	rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
 	/* Fill next WQE. */
 	wqe_idx = sq->head & mask;
-	sq->elts[wqe_idx].ct = ct;
-	sq->elts[wqe_idx].query_data = data;
+	/* Check if this is async mode. */
+	if (user_data) {
+		struct mlx5_hw_q_job *job = (struct mlx5_hw_q_job *)user_data;
+
+		sq->elts[wqe_idx].ct = user_data;
+		job->out_data = (char *)((uintptr_t)sq->mr.addr + wqe_idx * 64);
+	} else {
+		sq->elts[wqe_idx].query_data = data;
+		sq->elts[wqe_idx].ct = ct;
+	}
 	pool = __mlx5_aso_ct_get_pool(sh, ct);
 	/* Each WQE will have a single CT object. */
 	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
@@ -1317,9 +1419,13 @@  mlx5_aso_ct_sq_query_single(struct mlx5_dev_ctx_shared *sh,
 	 * data segment is not used in this case.
 	 */
 	sq->pi += 2;
-	mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe,
-			   sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
-			   !sh->tx_uar.dbnc);
+	if (push) {
+		mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe,
+				   sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
+				   !sh->tx_uar.dbnc);
+		sq->db_pi = sq->pi;
+	}
+	sq->db = wqe;
 	if (need_lock)
 		rte_spinlock_unlock(&sq->sqsl);
 	return 1;
@@ -1405,20 +1511,29 @@  int
 mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
 			  uint32_t queue,
 			  struct mlx5_aso_ct_action *ct,
-			  const struct rte_flow_action_conntrack *profile)
+			  const struct rte_flow_action_conntrack *profile,
+			  void *user_data,
+			  bool push)
 {
 	uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
 	struct mlx5_aso_ct_pool *pool = __mlx5_aso_ct_get_pool(sh, ct);
 	struct mlx5_aso_sq *sq;
 	bool need_lock = !!(queue == MLX5_HW_INV_QUEUE);
+	int ret;
 
 	if (sh->config.dv_flow_en == 2)
 		sq = __mlx5_aso_ct_get_sq_in_hws(queue, pool);
 	else
 		sq = __mlx5_aso_ct_get_sq_in_sws(sh, ct);
+	if (queue != MLX5_HW_INV_QUEUE) {
+		ret = mlx5_aso_ct_sq_enqueue_single(sh, sq, ct, profile,
+						    need_lock, user_data, push);
+		return ret > 0 ? 0 : -1;
+	}
 	do {
-		mlx5_aso_ct_completion_handle(sh, sq, need_lock);
-		if (mlx5_aso_ct_sq_enqueue_single(sh, sq, ct, profile, need_lock))
+		mlx5_aso_ct_completion_handle(sh, sq,  need_lock);
+		if (mlx5_aso_ct_sq_enqueue_single(sh, sq, ct, profile,
+						  need_lock, NULL, true))
 			return 0;
 		/* Waiting for wqe resource. */
 		rte_delay_us_sleep(10u);
@@ -1478,7 +1593,7 @@  mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh, uint32_t queue,
  * @param[in] wdata
  *   Pointer to data fetched from hardware.
  */
-static inline void
+void
 mlx5_aso_ct_obj_analyze(struct rte_flow_action_conntrack *profile,
 			char *wdata)
 {
@@ -1562,7 +1677,8 @@  int
 mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
 			 uint32_t queue,
 			 struct mlx5_aso_ct_action *ct,
-			 struct rte_flow_action_conntrack *profile)
+			 struct rte_flow_action_conntrack *profile,
+			 void *user_data, bool push)
 {
 	uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
 	struct mlx5_aso_ct_pool *pool = __mlx5_aso_ct_get_pool(sh, ct);
@@ -1575,9 +1691,15 @@  mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
 		sq = __mlx5_aso_ct_get_sq_in_hws(queue, pool);
 	else
 		sq = __mlx5_aso_ct_get_sq_in_sws(sh, ct);
+	if (queue != MLX5_HW_INV_QUEUE) {
+		ret = mlx5_aso_ct_sq_query_single(sh, sq, ct, out_data,
+						  need_lock, user_data, push);
+		return ret > 0 ? 0 : -1;
+	}
 	do {
 		mlx5_aso_ct_completion_handle(sh, sq, need_lock);
-		ret = mlx5_aso_ct_sq_query_single(sh, sq, ct, out_data, need_lock);
+		ret = mlx5_aso_ct_sq_query_single(sh, sq, ct, out_data,
+				need_lock, NULL, true);
 		if (ret < 0)
 			return ret;
 		else if (ret > 0)
@@ -1628,7 +1750,8 @@  mlx5_aso_ct_available(struct mlx5_dev_ctx_shared *sh,
 		rte_errno = ENXIO;
 		return -rte_errno;
 	} else if (state == ASO_CONNTRACK_READY ||
-		   state == ASO_CONNTRACK_QUERY) {
+		   state == ASO_CONNTRACK_QUERY ||
+		   state == ASO_CONNTRACK_WAIT_ASYNC) {
 		return 0;
 	}
 	do {
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 58a7e94ee0..085cb23c78 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -13091,7 +13091,7 @@  flow_dv_translate_create_conntrack(struct rte_eth_dev *dev,
 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					  "Failed to allocate CT object");
 	ct = flow_aso_ct_get_by_dev_idx(dev, idx);
-	if (mlx5_aso_ct_update_by_wqe(sh, MLX5_HW_INV_QUEUE, ct, pro)) {
+	if (mlx5_aso_ct_update_by_wqe(sh, MLX5_HW_INV_QUEUE, ct, pro, NULL, true)) {
 		flow_dv_aso_ct_dev_release(dev, idx);
 		rte_flow_error_set(error, EBUSY,
 				   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
@@ -15904,7 +15904,7 @@  __flow_dv_action_ct_update(struct rte_eth_dev *dev, uint32_t idx,
 		if (ret)
 			return ret;
 		ret = mlx5_aso_ct_update_by_wqe(priv->sh, MLX5_HW_INV_QUEUE,
-						ct, new_prf);
+						ct, new_prf, NULL, true);
 		if (ret)
 			return rte_flow_error_set(error, EIO,
 					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
@@ -16740,7 +16740,8 @@  flow_dv_action_query(struct rte_eth_dev *dev,
 							ct->peer;
 		((struct rte_flow_action_conntrack *)data)->is_original_dir =
 							ct->is_original;
-		if (mlx5_aso_ct_query_by_wqe(priv->sh, MLX5_HW_INV_QUEUE, ct, data))
+		if (mlx5_aso_ct_query_by_wqe(priv->sh, MLX5_HW_INV_QUEUE, ct,
+					data, NULL, true))
 			return rte_flow_error_set(error, EIO,
 					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
 					NULL,
diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c
index 5c0981d385..1879c8e9ca 100644
--- a/drivers/net/mlx5/mlx5_flow_hw.c
+++ b/drivers/net/mlx5/mlx5_flow_hw.c
@@ -1161,9 +1161,9 @@  static rte_be32_t vlan_hdr_to_be32(const struct rte_flow_action *actions)
 }
 
 static __rte_always_inline struct mlx5_aso_mtr *
-flow_hw_meter_mark_alloc(struct rte_eth_dev *dev,
-			   const struct rte_flow_action *action,
-			   uint32_t queue)
+flow_hw_meter_mark_alloc(struct rte_eth_dev *dev, uint32_t queue,
+			 const struct rte_flow_action *action,
+			 void *user_data, bool push)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_aso_mtr_pool *pool = priv->hws_mpool;
@@ -1183,13 +1183,14 @@  flow_hw_meter_mark_alloc(struct rte_eth_dev *dev,
 	fm->is_enable = meter_mark->state;
 	fm->color_aware = meter_mark->color_mode;
 	aso_mtr->pool = pool;
-	aso_mtr->state = ASO_METER_WAIT;
+	aso_mtr->state = (queue == MLX5_HW_INV_QUEUE) ?
+			  ASO_METER_WAIT : ASO_METER_WAIT_ASYNC;
 	aso_mtr->offset = mtr_id - 1;
 	aso_mtr->init_color = (meter_mark->color_mode) ?
 		meter_mark->init_color : RTE_COLOR_GREEN;
 	/* Update ASO flow meter by wqe. */
 	if (mlx5_aso_meter_update_by_wqe(priv->sh, queue, aso_mtr,
-					 &priv->mtr_bulk)) {
+					 &priv->mtr_bulk, user_data, push)) {
 		mlx5_ipool_free(pool->idx_pool, mtr_id);
 		return NULL;
 	}
@@ -1214,7 +1215,7 @@  flow_hw_meter_mark_compile(struct rte_eth_dev *dev,
 	struct mlx5_aso_mtr_pool *pool = priv->hws_mpool;
 	struct mlx5_aso_mtr *aso_mtr;
 
-	aso_mtr = flow_hw_meter_mark_alloc(dev, action, queue);
+	aso_mtr = flow_hw_meter_mark_alloc(dev, queue, action, NULL, true);
 	if (!aso_mtr)
 		return -1;
 
@@ -2278,9 +2279,13 @@  flow_hw_actions_construct(struct rte_eth_dev *dev,
 				rte_col_2_mlx5_col(aso_mtr->init_color);
 			break;
 		case RTE_FLOW_ACTION_TYPE_METER_MARK:
+			/*
+			 * Allocate meter directly will slow down flow
+			 * insertion rate.
+			 */
 			ret = flow_hw_meter_mark_compile(dev,
 				act_data->action_dst, action,
-				rule_acts, &job->flow->mtr_id, queue);
+				rule_acts, &job->flow->mtr_id, MLX5_HW_INV_QUEUE);
 			if (ret != 0)
 				return ret;
 			break;
@@ -2587,6 +2592,74 @@  flow_hw_age_count_release(struct mlx5_priv *priv, uint32_t queue,
 	}
 }
 
+static inline int
+__flow_hw_pull_indir_action_comp(struct rte_eth_dev *dev,
+				 uint32_t queue,
+				 struct rte_flow_op_result res[],
+				 uint16_t n_res)
+
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct rte_ring *r = priv->hw_q[queue].indir_cq;
+	struct mlx5_hw_q_job *job;
+	void *user_data = NULL;
+	uint32_t type, idx;
+	struct mlx5_aso_mtr *aso_mtr;
+	struct mlx5_aso_ct_action *aso_ct;
+	int ret_comp, i;
+
+	ret_comp = (int)rte_ring_count(r);
+	if (ret_comp > n_res)
+		ret_comp = n_res;
+	for (i = 0; i < ret_comp; i++) {
+		rte_ring_dequeue(r, &user_data);
+		res[i].user_data = user_data;
+		res[i].status = RTE_FLOW_OP_SUCCESS;
+	}
+	if (ret_comp < n_res && priv->hws_mpool)
+		ret_comp += mlx5_aso_pull_completion(&priv->hws_mpool->sq[queue],
+				&res[ret_comp], n_res - ret_comp);
+	if (ret_comp < n_res && priv->hws_ctpool)
+		ret_comp += mlx5_aso_pull_completion(&priv->ct_mng->aso_sqs[queue],
+				&res[ret_comp], n_res - ret_comp);
+	for (i = 0; i <  ret_comp; i++) {
+		job = (struct mlx5_hw_q_job *)res[i].user_data;
+		/* Restore user data. */
+		res[i].user_data = job->user_data;
+		if (job->type == MLX5_HW_Q_JOB_TYPE_DESTROY) {
+			type = MLX5_INDIRECT_ACTION_TYPE_GET(job->action);
+			if (type == MLX5_INDIRECT_ACTION_TYPE_METER_MARK) {
+				idx = MLX5_INDIRECT_ACTION_IDX_GET(job->action);
+				mlx5_ipool_free(priv->hws_mpool->idx_pool, idx);
+			}
+		} else if (job->type == MLX5_HW_Q_JOB_TYPE_CREATE) {
+			type = MLX5_INDIRECT_ACTION_TYPE_GET(job->action);
+			if (type == MLX5_INDIRECT_ACTION_TYPE_METER_MARK) {
+				idx = MLX5_INDIRECT_ACTION_IDX_GET(job->action);
+				aso_mtr = mlx5_ipool_get(priv->hws_mpool->idx_pool, idx);
+				aso_mtr->state = ASO_METER_READY;
+			} else if (type == MLX5_INDIRECT_ACTION_TYPE_CT) {
+				idx = MLX5_ACTION_CTX_CT_GET_IDX
+					((uint32_t)(uintptr_t)job->action);
+				aso_ct = mlx5_ipool_get(priv->hws_ctpool->cts, idx);
+				aso_ct->state = ASO_CONNTRACK_READY;
+			}
+		} else if (job->type == MLX5_HW_Q_JOB_TYPE_QUERY) {
+			type = MLX5_INDIRECT_ACTION_TYPE_GET(job->action);
+			if (type == MLX5_INDIRECT_ACTION_TYPE_CT) {
+				idx = MLX5_ACTION_CTX_CT_GET_IDX
+					((uint32_t)(uintptr_t)job->action);
+				aso_ct = mlx5_ipool_get(priv->hws_ctpool->cts, idx);
+				mlx5_aso_ct_obj_analyze(job->profile,
+							job->out_data);
+				aso_ct->state = ASO_CONNTRACK_READY;
+			}
+		}
+		priv->hw_q[queue].job[priv->hw_q[queue].job_idx++] = job;
+	}
+	return ret_comp;
+}
+
 /**
  * Pull the enqueued flows.
  *
@@ -2619,6 +2692,7 @@  flow_hw_pull(struct rte_eth_dev *dev,
 	struct mlx5_hw_q_job *job;
 	int ret, i;
 
+	/* 1. Pull the flow completion. */
 	ret = mlx5dr_send_queue_poll(priv->dr_ctx, queue, res, n_res);
 	if (ret < 0)
 		return rte_flow_error_set(error, rte_errno,
@@ -2644,9 +2718,34 @@  flow_hw_pull(struct rte_eth_dev *dev,
 		}
 		priv->hw_q[queue].job[priv->hw_q[queue].job_idx++] = job;
 	}
+	/* 2. Pull indirect action comp. */
+	if (ret < n_res)
+		ret += __flow_hw_pull_indir_action_comp(dev, queue, &res[ret],
+							n_res - ret);
 	return ret;
 }
 
+static inline void
+__flow_hw_push_action(struct rte_eth_dev *dev,
+		    uint32_t queue)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct rte_ring *iq = priv->hw_q[queue].indir_iq;
+	struct rte_ring *cq = priv->hw_q[queue].indir_cq;
+	void *job = NULL;
+	uint32_t ret, i;
+
+	ret = rte_ring_count(iq);
+	for (i = 0; i < ret; i++) {
+		rte_ring_dequeue(iq, &job);
+		rte_ring_enqueue(cq, job);
+	}
+	if (priv->hws_ctpool)
+		mlx5_aso_push_wqe(priv->sh, &priv->ct_mng->aso_sqs[queue]);
+	if (priv->hws_mpool)
+		mlx5_aso_push_wqe(priv->sh, &priv->hws_mpool->sq[queue]);
+}
+
 /**
  * Push the enqueued flows to HW.
  *
@@ -2670,6 +2769,7 @@  flow_hw_push(struct rte_eth_dev *dev,
 	struct mlx5_priv *priv = dev->data->dev_private;
 	int ret;
 
+	__flow_hw_push_action(dev, queue);
 	ret = mlx5dr_send_queue_action(priv->dr_ctx, queue,
 				       MLX5DR_SEND_QUEUE_ACTION_DRAIN);
 	if (ret) {
@@ -5906,7 +6006,7 @@  flow_hw_configure(struct rte_eth_dev *dev,
 	/* Adds one queue to be used by PMD.
 	 * The last queue will be used by the PMD.
 	 */
-	uint16_t nb_q_updated;
+	uint16_t nb_q_updated = 0;
 	struct rte_flow_queue_attr **_queue_attr = NULL;
 	struct rte_flow_queue_attr ctrl_queue_attr = {0};
 	bool is_proxy = !!(priv->sh->config.dv_esw_en && priv->master);
@@ -5973,6 +6073,7 @@  flow_hw_configure(struct rte_eth_dev *dev,
 		goto err;
 	}
 	for (i = 0; i < nb_q_updated; i++) {
+		char mz_name[RTE_MEMZONE_NAMESIZE];
 		uint8_t *encap = NULL;
 		struct mlx5_modification_cmd *mhdr_cmd = NULL;
 		struct rte_flow_item *items = NULL;
@@ -6000,6 +6101,22 @@  flow_hw_configure(struct rte_eth_dev *dev,
 			job[j].items = &items[j * MLX5_HW_MAX_ITEMS];
 			priv->hw_q[i].job[j] = &job[j];
 		}
+		snprintf(mz_name, sizeof(mz_name), "port_%u_indir_act_cq_%u",
+			 dev->data->port_id, i);
+		priv->hw_q[i].indir_cq = rte_ring_create(mz_name,
+				_queue_attr[i]->size, SOCKET_ID_ANY,
+				RING_F_SP_ENQ | RING_F_SC_DEQ |
+				RING_F_EXACT_SZ);
+		if (!priv->hw_q[i].indir_cq)
+			goto err;
+		snprintf(mz_name, sizeof(mz_name), "port_%u_indir_act_iq_%u",
+			 dev->data->port_id, i);
+		priv->hw_q[i].indir_iq = rte_ring_create(mz_name,
+				_queue_attr[i]->size, SOCKET_ID_ANY,
+				RING_F_SP_ENQ | RING_F_SC_DEQ |
+				RING_F_EXACT_SZ);
+		if (!priv->hw_q[i].indir_iq)
+			goto err;
 	}
 	dr_ctx_attr.pd = priv->sh->cdev->pd;
 	dr_ctx_attr.queues = nb_q_updated;
@@ -6117,6 +6234,12 @@  flow_hw_configure(struct rte_eth_dev *dev,
 	flow_hw_destroy_vlan(dev);
 	if (dr_ctx)
 		claim_zero(mlx5dr_context_close(dr_ctx));
+	for (i = 0; i < nb_q_updated; i++) {
+		if (priv->hw_q[i].indir_iq)
+			rte_ring_free(priv->hw_q[i].indir_iq);
+		if (priv->hw_q[i].indir_cq)
+			rte_ring_free(priv->hw_q[i].indir_cq);
+	}
 	mlx5_free(priv->hw_q);
 	priv->hw_q = NULL;
 	if (priv->acts_ipool) {
@@ -6146,7 +6269,7 @@  flow_hw_resource_release(struct rte_eth_dev *dev)
 	struct rte_flow_template_table *tbl;
 	struct rte_flow_pattern_template *it;
 	struct rte_flow_actions_template *at;
-	int i;
+	uint32_t i;
 
 	if (!priv->dr_ctx)
 		return;
@@ -6192,6 +6315,10 @@  flow_hw_resource_release(struct rte_eth_dev *dev)
 		flow_hw_ct_mng_destroy(dev, priv->ct_mng);
 		priv->ct_mng = NULL;
 	}
+	for (i = 0; i < priv->nb_queue; i++) {
+		rte_ring_free(priv->hw_q[i].indir_iq);
+		rte_ring_free(priv->hw_q[i].indir_cq);
+	}
 	mlx5_free(priv->hw_q);
 	priv->hw_q = NULL;
 	claim_zero(mlx5dr_context_close(priv->dr_ctx));
@@ -6380,8 +6507,9 @@  flow_hw_conntrack_destroy(struct rte_eth_dev *dev __rte_unused,
 }
 
 static int
-flow_hw_conntrack_query(struct rte_eth_dev *dev, uint32_t idx,
+flow_hw_conntrack_query(struct rte_eth_dev *dev, uint32_t queue, uint32_t idx,
 			struct rte_flow_action_conntrack *profile,
+			void *user_data, bool push,
 			struct rte_flow_error *error)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
@@ -6405,7 +6533,7 @@  flow_hw_conntrack_query(struct rte_eth_dev *dev, uint32_t idx,
 	}
 	profile->peer_port = ct->peer;
 	profile->is_original_dir = ct->is_original;
-	if (mlx5_aso_ct_query_by_wqe(priv->sh, MLX5_HW_INV_QUEUE, ct, profile))
+	if (mlx5_aso_ct_query_by_wqe(priv->sh, queue, ct, profile, user_data, push))
 		return rte_flow_error_set(error, EIO,
 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
 				NULL,
@@ -6417,7 +6545,8 @@  flow_hw_conntrack_query(struct rte_eth_dev *dev, uint32_t idx,
 static int
 flow_hw_conntrack_update(struct rte_eth_dev *dev, uint32_t queue,
 			 const struct rte_flow_modify_conntrack *action_conf,
-			 uint32_t idx, struct rte_flow_error *error)
+			 uint32_t idx, void *user_data, bool push,
+			 struct rte_flow_error *error)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_aso_ct_pool *pool = priv->hws_ctpool;
@@ -6448,7 +6577,8 @@  flow_hw_conntrack_update(struct rte_eth_dev *dev, uint32_t queue,
 		ret = mlx5_validate_action_ct(dev, new_prf, error);
 		if (ret)
 			return ret;
-		ret = mlx5_aso_ct_update_by_wqe(priv->sh, queue, ct, new_prf);
+		ret = mlx5_aso_ct_update_by_wqe(priv->sh, queue, ct, new_prf,
+						user_data, push);
 		if (ret)
 			return rte_flow_error_set(error, EIO,
 					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
@@ -6470,6 +6600,7 @@  flow_hw_conntrack_update(struct rte_eth_dev *dev, uint32_t queue,
 static struct rte_flow_action_handle *
 flow_hw_conntrack_create(struct rte_eth_dev *dev, uint32_t queue,
 			 const struct rte_flow_action_conntrack *pro,
+			 void *user_data, bool push,
 			 struct rte_flow_error *error)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
@@ -6496,7 +6627,7 @@  flow_hw_conntrack_create(struct rte_eth_dev *dev, uint32_t queue,
 	ct->is_original = !!pro->is_original_dir;
 	ct->peer = pro->peer_port;
 	ct->pool = pool;
-	if (mlx5_aso_ct_update_by_wqe(priv->sh, queue, ct, pro)) {
+	if (mlx5_aso_ct_update_by_wqe(priv->sh, queue, ct, pro, user_data, push)) {
 		mlx5_ipool_free(pool->cts, ct_idx);
 		rte_flow_error_set(error, EBUSY,
 				   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
@@ -6588,15 +6719,29 @@  flow_hw_action_handle_create(struct rte_eth_dev *dev, uint32_t queue,
 			     struct rte_flow_error *error)
 {
 	struct rte_flow_action_handle *handle = NULL;
+	struct mlx5_hw_q_job *job = NULL;
 	struct mlx5_priv *priv = dev->data->dev_private;
 	const struct rte_flow_action_age *age;
 	struct mlx5_aso_mtr *aso_mtr;
 	cnt_id_t cnt_id;
 	uint32_t mtr_id;
 	uint32_t age_idx;
+	bool push = true;
+	bool aso = false;
 
-	RTE_SET_USED(attr);
-	RTE_SET_USED(user_data);
+	if (attr) {
+		MLX5_ASSERT(queue != MLX5_HW_INV_QUEUE);
+		if (unlikely(!priv->hw_q[queue].job_idx)) {
+			rte_flow_error_set(error, ENOMEM,
+				RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+				"Flow queue full.");
+			return NULL;
+		}
+		job = priv->hw_q[queue].job[--priv->hw_q[queue].job_idx];
+		job->type = MLX5_HW_Q_JOB_TYPE_CREATE;
+		job->user_data = user_data;
+		push = !attr->postpone;
+	}
 	switch (action->type) {
 	case RTE_FLOW_ACTION_TYPE_AGE:
 		age = action->conf;
@@ -6624,10 +6769,13 @@  flow_hw_action_handle_create(struct rte_eth_dev *dev, uint32_t queue,
 				 (uintptr_t)cnt_id;
 		break;
 	case RTE_FLOW_ACTION_TYPE_CONNTRACK:
-		handle = flow_hw_conntrack_create(dev, queue, action->conf, error);
+		aso = true;
+		handle = flow_hw_conntrack_create(dev, queue, action->conf, job,
+						  push, error);
 		break;
 	case RTE_FLOW_ACTION_TYPE_METER_MARK:
-		aso_mtr = flow_hw_meter_mark_alloc(dev, action, queue);
+		aso = true;
+		aso_mtr = flow_hw_meter_mark_alloc(dev, queue, action, job, push);
 		if (!aso_mtr)
 			break;
 		mtr_id = (MLX5_INDIRECT_ACTION_TYPE_METER_MARK <<
@@ -6640,7 +6788,20 @@  flow_hw_action_handle_create(struct rte_eth_dev *dev, uint32_t queue,
 	default:
 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
 				   NULL, "action type not supported");
-		return NULL;
+		break;
+	}
+	if (job) {
+		if (!handle) {
+			priv->hw_q[queue].job_idx++;
+			return NULL;
+		}
+		job->action = handle;
+		if (push)
+			__flow_hw_push_action(dev, queue);
+		if (aso)
+			return handle;
+		rte_ring_enqueue(push ? priv->hw_q[queue].indir_cq :
+				 priv->hw_q[queue].indir_iq, job);
 	}
 	return handle;
 }
@@ -6674,32 +6835,56 @@  flow_hw_action_handle_update(struct rte_eth_dev *dev, uint32_t queue,
 			     void *user_data,
 			     struct rte_flow_error *error)
 {
-	RTE_SET_USED(attr);
-	RTE_SET_USED(user_data);
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_aso_mtr_pool *pool = priv->hws_mpool;
+	const struct rte_flow_modify_conntrack *ct_conf =
+		(const struct rte_flow_modify_conntrack *)update;
 	const struct rte_flow_update_meter_mark *upd_meter_mark =
 		(const struct rte_flow_update_meter_mark *)update;
 	const struct rte_flow_action_meter_mark *meter_mark;
+	struct mlx5_hw_q_job *job = NULL;
 	struct mlx5_aso_mtr *aso_mtr;
 	struct mlx5_flow_meter_info *fm;
 	uint32_t act_idx = (uint32_t)(uintptr_t)handle;
 	uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
 	uint32_t idx = act_idx & ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
+	int ret = 0;
+	bool push = true;
+	bool aso = false;
 
+	if (attr) {
+		MLX5_ASSERT(queue != MLX5_HW_INV_QUEUE);
+		if (unlikely(!priv->hw_q[queue].job_idx))
+			return rte_flow_error_set(error, ENOMEM,
+				RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+				"Action update failed due to queue full.");
+		job = priv->hw_q[queue].job[--priv->hw_q[queue].job_idx];
+		job->type = MLX5_HW_Q_JOB_TYPE_UPDATE;
+		job->user_data = user_data;
+		push = !attr->postpone;
+	}
 	switch (type) {
 	case MLX5_INDIRECT_ACTION_TYPE_AGE:
-		return mlx5_hws_age_action_update(priv, idx, update, error);
+		ret = mlx5_hws_age_action_update(priv, idx, update, error);
+		break;
 	case MLX5_INDIRECT_ACTION_TYPE_CT:
-		return flow_hw_conntrack_update(dev, queue, update, act_idx, error);
+		if (ct_conf->state)
+			aso = true;
+		ret = flow_hw_conntrack_update(dev, queue, update, act_idx,
+					       job, push, error);
+		break;
 	case MLX5_INDIRECT_ACTION_TYPE_METER_MARK:
+		aso = true;
 		meter_mark = &upd_meter_mark->meter_mark;
 		/* Find ASO object. */
 		aso_mtr = mlx5_ipool_get(pool->idx_pool, idx);
-		if (!aso_mtr)
-			return rte_flow_error_set(error, EINVAL,
+		if (!aso_mtr) {
+			ret = -EINVAL;
+			rte_flow_error_set(error, EINVAL,
 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
 				NULL, "Invalid meter_mark update index");
+			break;
+		}
 		fm = &aso_mtr->fm;
 		if (upd_meter_mark->profile_valid)
 			fm->profile = (struct mlx5_flow_meter_profile *)
@@ -6713,25 +6898,46 @@  flow_hw_action_handle_update(struct rte_eth_dev *dev, uint32_t queue,
 			fm->is_enable = meter_mark->state;
 		/* Update ASO flow meter by wqe. */
 		if (mlx5_aso_meter_update_by_wqe(priv->sh, queue,
-						 aso_mtr, &priv->mtr_bulk))
-			return rte_flow_error_set(error, EINVAL,
+						 aso_mtr, &priv->mtr_bulk, job, push)) {
+			ret = -EINVAL;
+			rte_flow_error_set(error, EINVAL,
 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
 				NULL, "Unable to update ASO meter WQE");
+			break;
+		}
 		/* Wait for ASO object completion. */
 		if (queue == MLX5_HW_INV_QUEUE &&
-		    mlx5_aso_mtr_wait(priv->sh, MLX5_HW_INV_QUEUE, aso_mtr))
-			return rte_flow_error_set(error, EINVAL,
+		    mlx5_aso_mtr_wait(priv->sh, MLX5_HW_INV_QUEUE, aso_mtr)) {
+			ret = -EINVAL;
+			rte_flow_error_set(error, EINVAL,
 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
 				NULL, "Unable to wait for ASO meter CQE");
+		}
 		break;
 	case MLX5_INDIRECT_ACTION_TYPE_RSS:
-		return flow_dv_action_update(dev, handle, update, error);
+		ret = flow_dv_action_update(dev, handle, update, error);
+		break;
 	default:
-		return rte_flow_error_set(error, ENOTSUP,
+		ret = -ENOTSUP;
+		rte_flow_error_set(error, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					  "action type not supported");
+		break;
 	}
-	return 0;
+	if (job) {
+		if (ret) {
+			priv->hw_q[queue].job_idx++;
+			return ret;
+		}
+		job->action = handle;
+		if (push)
+			__flow_hw_push_action(dev, queue);
+		if (aso)
+			return 0;
+		rte_ring_enqueue(push ? priv->hw_q[queue].indir_cq :
+				 priv->hw_q[queue].indir_iq, job);
+	}
+	return ret;
 }
 
 /**
@@ -6766,15 +6972,28 @@  flow_hw_action_handle_destroy(struct rte_eth_dev *dev, uint32_t queue,
 	uint32_t idx = act_idx & ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_aso_mtr_pool *pool = priv->hws_mpool;
+	struct mlx5_hw_q_job *job = NULL;
 	struct mlx5_aso_mtr *aso_mtr;
 	struct mlx5_flow_meter_info *fm;
+	bool push = true;
+	bool aso = false;
+	int ret = 0;
 
-	RTE_SET_USED(queue);
-	RTE_SET_USED(attr);
-	RTE_SET_USED(user_data);
+	if (attr) {
+		MLX5_ASSERT(queue != MLX5_HW_INV_QUEUE);
+		if (unlikely(!priv->hw_q[queue].job_idx))
+			return rte_flow_error_set(error, ENOMEM,
+				RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+				"Action destroy failed due to queue full.");
+		job = priv->hw_q[queue].job[--priv->hw_q[queue].job_idx];
+		job->type = MLX5_HW_Q_JOB_TYPE_DESTROY;
+		job->user_data = user_data;
+		push = !attr->postpone;
+	}
 	switch (type) {
 	case MLX5_INDIRECT_ACTION_TYPE_AGE:
-		return mlx5_hws_age_action_destroy(priv, age_idx, error);
+		ret = mlx5_hws_age_action_destroy(priv, age_idx, error);
+		break;
 	case MLX5_INDIRECT_ACTION_TYPE_COUNT:
 		age_idx = mlx5_hws_cnt_age_get(priv->hws_cpool, act_idx);
 		if (age_idx != 0)
@@ -6783,39 +7002,69 @@  flow_hw_action_handle_destroy(struct rte_eth_dev *dev, uint32_t queue,
 			 * time to update the AGE.
 			 */
 			mlx5_hws_age_nb_cnt_decrease(priv, age_idx);
-		return mlx5_hws_cnt_shared_put(priv->hws_cpool, &act_idx);
+		ret = mlx5_hws_cnt_shared_put(priv->hws_cpool, &act_idx);
+		break;
 	case MLX5_INDIRECT_ACTION_TYPE_CT:
-		return flow_hw_conntrack_destroy(dev, act_idx, error);
+		ret = flow_hw_conntrack_destroy(dev, act_idx, error);
+		break;
 	case MLX5_INDIRECT_ACTION_TYPE_METER_MARK:
 		aso_mtr = mlx5_ipool_get(pool->idx_pool, idx);
-		if (!aso_mtr)
-			return rte_flow_error_set(error, EINVAL,
+		if (!aso_mtr) {
+			ret = -EINVAL;
+			rte_flow_error_set(error, EINVAL,
 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
 				NULL, "Invalid meter_mark destroy index");
+			break;
+		}
 		fm = &aso_mtr->fm;
 		fm->is_enable = 0;
 		/* Update ASO flow meter by wqe. */
 		if (mlx5_aso_meter_update_by_wqe(priv->sh, queue, aso_mtr,
-						 &priv->mtr_bulk))
-			return rte_flow_error_set(error, EINVAL,
+						 &priv->mtr_bulk, job, push)) {
+			ret = -EINVAL;
+			rte_flow_error_set(error, EINVAL,
 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
 				NULL, "Unable to update ASO meter WQE");
+			break;
+		}
 		/* Wait for ASO object completion. */
 		if (queue == MLX5_HW_INV_QUEUE &&
-		    mlx5_aso_mtr_wait(priv->sh, MLX5_HW_INV_QUEUE, aso_mtr))
-			return rte_flow_error_set(error, EINVAL,
+		    mlx5_aso_mtr_wait(priv->sh, MLX5_HW_INV_QUEUE, aso_mtr)) {
+			ret = -EINVAL;
+			rte_flow_error_set(error, EINVAL,
 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
 				NULL, "Unable to wait for ASO meter CQE");
-		mlx5_ipool_free(pool->idx_pool, idx);
+			break;
+		}
+		if (!job)
+			mlx5_ipool_free(pool->idx_pool, idx);
+		else
+			aso = true;
 		break;
 	case MLX5_INDIRECT_ACTION_TYPE_RSS:
-		return flow_dv_action_destroy(dev, handle, error);
+		ret = flow_dv_action_destroy(dev, handle, error);
+		break;
 	default:
-		return rte_flow_error_set(error, ENOTSUP,
+		ret = -ENOTSUP;
+		rte_flow_error_set(error, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					  "action type not supported");
+		break;
 	}
-	return 0;
+	if (job) {
+		if (ret) {
+			priv->hw_q[queue].job_idx++;
+			return ret;
+		}
+		job->action = handle;
+		if (push)
+			__flow_hw_push_action(dev, queue);
+		if (aso)
+			return ret;
+		rte_ring_enqueue(push ? priv->hw_q[queue].indir_cq :
+				 priv->hw_q[queue].indir_iq, job);
+	}
+	return ret;
 }
 
 static int
@@ -7045,28 +7294,76 @@  flow_hw_action_update(struct rte_eth_dev *dev,
 }
 
 static int
-flow_hw_action_query(struct rte_eth_dev *dev,
-		     const struct rte_flow_action_handle *handle, void *data,
-		     struct rte_flow_error *error)
+flow_hw_action_handle_query(struct rte_eth_dev *dev, uint32_t queue,
+			    const struct rte_flow_op_attr *attr,
+			    const struct rte_flow_action_handle *handle,
+			    void *data, void *user_data,
+			    struct rte_flow_error *error)
 {
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_hw_q_job *job = NULL;
 	uint32_t act_idx = (uint32_t)(uintptr_t)handle;
 	uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
 	uint32_t age_idx = act_idx & MLX5_HWS_AGE_IDX_MASK;
+	int ret;
+	bool push = true;
+	bool aso = false;
 
+	if (attr) {
+		MLX5_ASSERT(queue != MLX5_HW_INV_QUEUE);
+		if (unlikely(!priv->hw_q[queue].job_idx))
+			return rte_flow_error_set(error, ENOMEM,
+				RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+				"Action destroy failed due to queue full.");
+		job = priv->hw_q[queue].job[--priv->hw_q[queue].job_idx];
+		job->type = MLX5_HW_Q_JOB_TYPE_QUERY;
+		job->user_data = user_data;
+		push = !attr->postpone;
+	}
 	switch (type) {
 	case MLX5_INDIRECT_ACTION_TYPE_AGE:
-		return flow_hw_query_age(dev, age_idx, data, error);
+		ret = flow_hw_query_age(dev, age_idx, data, error);
+		break;
 	case MLX5_INDIRECT_ACTION_TYPE_COUNT:
-		return flow_hw_query_counter(dev, act_idx, data, error);
+		ret = flow_hw_query_counter(dev, act_idx, data, error);
+		break;
 	case MLX5_INDIRECT_ACTION_TYPE_CT:
-		return flow_hw_conntrack_query(dev, act_idx, data, error);
-	case MLX5_INDIRECT_ACTION_TYPE_RSS:
-		return flow_dv_action_query(dev, handle, data, error);
+		aso = true;
+		if (job)
+			job->profile = (struct rte_flow_action_conntrack *)data;
+		ret = flow_hw_conntrack_query(dev, queue, act_idx, data,
+					      job, push, error);
+		break;
 	default:
-		return rte_flow_error_set(error, ENOTSUP,
+		ret = -ENOTSUP;
+		rte_flow_error_set(error, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					  "action type not supported");
+		break;
+	}
+	if (job) {
+		if (ret) {
+			priv->hw_q[queue].job_idx++;
+			return ret;
+		}
+		job->action = handle;
+		if (push)
+			__flow_hw_push_action(dev, queue);
+		if (aso)
+			return ret;
+		rte_ring_enqueue(push ? priv->hw_q[queue].indir_cq :
+				 priv->hw_q[queue].indir_iq, job);
 	}
+	return 0;
+}
+
+static int
+flow_hw_action_query(struct rte_eth_dev *dev,
+		     const struct rte_flow_action_handle *handle, void *data,
+		     struct rte_flow_error *error)
+{
+	return flow_hw_action_handle_query(dev, MLX5_HW_INV_QUEUE, NULL,
+			handle, data, NULL, error);
 }
 
 /**
@@ -7181,6 +7478,7 @@  const struct mlx5_flow_driver_ops mlx5_flow_hw_drv_ops = {
 	.async_action_create = flow_hw_action_handle_create,
 	.async_action_destroy = flow_hw_action_handle_destroy,
 	.async_action_update = flow_hw_action_handle_update,
+	.async_action_query = flow_hw_action_handle_query,
 	.action_validate = flow_hw_action_validate,
 	.action_create = flow_hw_action_create,
 	.action_destroy = flow_hw_action_destroy,
diff --git a/drivers/net/mlx5/mlx5_flow_meter.c b/drivers/net/mlx5/mlx5_flow_meter.c
index fd1337ae73..480ac6c8ec 100644
--- a/drivers/net/mlx5/mlx5_flow_meter.c
+++ b/drivers/net/mlx5/mlx5_flow_meter.c
@@ -1627,7 +1627,7 @@  mlx5_flow_meter_action_modify(struct mlx5_priv *priv,
 		fm->is_enable = !!is_enable;
 		aso_mtr = container_of(fm, struct mlx5_aso_mtr, fm);
 		ret = mlx5_aso_meter_update_by_wqe(priv->sh, MLX5_HW_INV_QUEUE,
-						   aso_mtr, &priv->mtr_bulk);
+						   aso_mtr, &priv->mtr_bulk, NULL, true);
 		if (ret)
 			return ret;
 		ret = mlx5_aso_mtr_wait(priv->sh, MLX5_HW_INV_QUEUE, aso_mtr);
@@ -1877,7 +1877,7 @@  mlx5_flow_meter_create(struct rte_eth_dev *dev, uint32_t meter_id,
 	if (priv->sh->meter_aso_en) {
 		aso_mtr = container_of(fm, struct mlx5_aso_mtr, fm);
 		ret = mlx5_aso_meter_update_by_wqe(priv->sh, MLX5_HW_INV_QUEUE,
-						   aso_mtr, &priv->mtr_bulk);
+						   aso_mtr, &priv->mtr_bulk, NULL, true);
 		if (ret)
 			goto error;
 		if (!priv->mtr_idx_tbl) {
@@ -1983,7 +1983,7 @@  mlx5_flow_meter_hws_create(struct rte_eth_dev *dev, uint32_t meter_id,
 	fm->initialized = 1;
 	/* Update ASO flow meter by wqe. */
 	ret = mlx5_aso_meter_update_by_wqe(priv->sh, MLX5_HW_INV_QUEUE, aso_mtr,
-					   &priv->mtr_bulk);
+					   &priv->mtr_bulk, NULL, true);
 	if (ret)
 		return -rte_mtr_error_set(error, ENOTSUP,
 			RTE_MTR_ERROR_TYPE_UNSPECIFIED,