[v1,11/19] net/mlx5/hws: Add HWS send layer

Message ID 20220922190345.394-12-valex@nvidia.com (mailing list archive)
State Superseded, archived
Delegated to: Raslan Darawsheh
Headers
Series net/mlx5: Add HW steering low level support |

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Alex Vesker Sept. 22, 2022, 7:03 p.m. UTC
  HWS configures flows to the HW using a QP, each WQE has
the details of the flow we want to offload. The send layer
allocates the resources needed to send the request to the HW
as well as managing the queues, getting completions and
handling failures.

Signed-off-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Alex Vesker <valex@nvidia.com>
---
 drivers/net/mlx5/hws/mlx5dr_send.c | 849 +++++++++++++++++++++++++++++
 drivers/net/mlx5/hws/mlx5dr_send.h | 273 ++++++++++
 2 files changed, 1122 insertions(+)
 create mode 100644 drivers/net/mlx5/hws/mlx5dr_send.c
 create mode 100644 drivers/net/mlx5/hws/mlx5dr_send.h
  

Patch

diff --git a/drivers/net/mlx5/hws/mlx5dr_send.c b/drivers/net/mlx5/hws/mlx5dr_send.c
new file mode 100644
index 0000000000..63aba53792
--- /dev/null
+++ b/drivers/net/mlx5/hws/mlx5dr_send.c
@@ -0,0 +1,849 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) Copyright (c) 2022 NVIDIA Corporation 2021 NVIDIA CORPORATION. All rights reserved. Affiliates
+ */
+
+#include "mlx5dr_internal.h"
+
+struct mlx5dr_send_ring_dep_wqe *
+mlx5dr_send_add_new_dep_wqe(struct mlx5dr_send_engine *queue)
+{
+	struct mlx5dr_send_ring_sq *send_sq = &queue->send_ring->send_sq;
+	unsigned idx = send_sq->head_dep_idx++ & (queue->num_entries - 1);
+
+	memset(&send_sq->dep_wqe[idx].wqe_data.tag, 0, MLX5DR_MATCH_TAG_SZ);
+
+	return &send_sq->dep_wqe[idx];
+}
+
+void mlx5dr_send_abort_new_dep_wqe(struct mlx5dr_send_engine *queue)
+{
+	queue->send_ring->send_sq.head_dep_idx--;
+}
+
+void mlx5dr_send_all_dep_wqe(struct mlx5dr_send_engine *queue)
+{
+	struct mlx5dr_send_ring_sq *send_sq = &queue->send_ring->send_sq;
+	struct mlx5dr_send_ste_attr ste_attr = {0};
+	struct mlx5dr_send_ring_dep_wqe *dep_wqe;
+
+	ste_attr.send_attr.opmod = MLX5DR_WQE_GTA_OPMOD_STE;
+	ste_attr.send_attr.opcode = MLX5DR_WQE_OPCODE_TBL_ACCESS;
+	ste_attr.send_attr.len = MLX5DR_WQE_SZ_GTA_CTRL + MLX5DR_WQE_SZ_GTA_DATA;
+	ste_attr.gta_opcode = MLX5DR_WQE_GTA_OP_ACTIVATE;
+
+	/* Fence first from previous depend WQEs  */
+	ste_attr.send_attr.fence = 1;
+
+	while (send_sq->head_dep_idx != send_sq->tail_dep_idx) {
+		dep_wqe = &send_sq->dep_wqe[send_sq->tail_dep_idx++ & (queue->num_entries - 1)];
+
+		/* Notify HW on the last WQE */
+		ste_attr.send_attr.notify_hw = (send_sq->tail_dep_idx == send_sq->head_dep_idx);
+		ste_attr.send_attr.user_data = dep_wqe->user_data;
+		ste_attr.send_attr.rule = dep_wqe->rule;
+
+		ste_attr.rtc_0 = dep_wqe->rtc_0;
+		ste_attr.rtc_1 = dep_wqe->rtc_1;
+		ste_attr.retry_rtc_0 = dep_wqe->retry_rtc_0;
+		ste_attr.retry_rtc_1 = dep_wqe->retry_rtc_1;
+		ste_attr.used_id_rtc_0 = &dep_wqe->rule->rtc_0;
+		ste_attr.used_id_rtc_1 = &dep_wqe->rule->rtc_1;
+		ste_attr.wqe_ctrl = &dep_wqe->wqe_ctrl;
+		ste_attr.wqe_data = &dep_wqe->wqe_data;
+
+		mlx5dr_send_ste(queue, &ste_attr);
+
+		/* Fencing is done only on the first WQE */
+		ste_attr.send_attr.fence = 0;
+	}
+}
+
+struct mlx5dr_send_engine_post_ctrl
+mlx5dr_send_engine_post_start(struct mlx5dr_send_engine *queue)
+{
+	struct mlx5dr_send_engine_post_ctrl ctrl;
+
+	ctrl.queue = queue;
+	ctrl.send_ring = &queue->send_ring[0]; // TODO: Change when send rings > 1
+	ctrl.num_wqebbs = 0;
+
+	return ctrl;
+}
+
+void mlx5dr_send_engine_post_req_wqe(struct mlx5dr_send_engine_post_ctrl *ctrl,
+				     char **buf, size_t *len)
+{
+	struct mlx5dr_send_ring_sq *send_sq = &ctrl->send_ring->send_sq;
+	unsigned int idx;
+
+	idx = (send_sq->cur_post + ctrl->num_wqebbs) & send_sq->buf_mask;
+
+	*buf = send_sq->buf + (idx << MLX5_SEND_WQE_SHIFT);
+	*len = MLX5_SEND_WQE_BB;
+
+	if (!ctrl->num_wqebbs) {
+		*buf += sizeof(struct mlx5dr_wqe_ctrl_seg);
+		*len -= sizeof(struct mlx5dr_wqe_ctrl_seg);
+	}
+
+	ctrl->num_wqebbs++;
+}
+
+static void mlx5dr_send_engine_post_ring(struct mlx5dr_send_ring_sq *sq,
+					 struct mlx5dv_devx_uar *uar,
+					 struct mlx5dr_wqe_ctrl_seg *wqe_ctrl)
+{
+	rte_compiler_barrier();
+	sq->db[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->cur_post);
+
+	rte_wmb();
+	mlx5dr_uar_write64_relaxed(*((uint64_t *)wqe_ctrl), uar->reg_addr);
+	rte_wmb();
+}
+
+static void
+mlx5dr_send_wqe_set_tag(struct mlx5dr_wqe_gta_data_seg_ste *wqe_data,
+			struct mlx5dr_rule_match_tag *tag,
+			bool is_jumbo)
+{
+	if (is_jumbo) {
+		/* Clear previous possibly dirty control */
+		memset(wqe_data, 0, MLX5DR_STE_CTRL_SZ);
+		memcpy(wqe_data->action, tag->jumbo, MLX5DR_JUMBO_TAG_SZ);
+	} else {
+		/* Clear previous possibly dirty control and actions */
+		memset(wqe_data, 0, MLX5DR_STE_CTRL_SZ + MLX5DR_ACTIONS_SZ);
+		memcpy(wqe_data->tag, tag->match, MLX5DR_MATCH_TAG_SZ);
+	}
+}
+
+void mlx5dr_send_engine_post_end(struct mlx5dr_send_engine_post_ctrl *ctrl,
+				 struct mlx5dr_send_engine_post_attr *attr)
+{
+	struct mlx5dr_wqe_ctrl_seg *wqe_ctrl;
+	struct mlx5dr_send_ring_sq *sq;
+	uint32_t flags = 0;
+	unsigned idx;
+
+	sq = &ctrl->send_ring->send_sq;
+	idx = sq->cur_post & sq->buf_mask;
+	sq->last_idx = idx;
+
+	wqe_ctrl = (void *)(sq->buf + (idx << MLX5_SEND_WQE_SHIFT));
+
+	wqe_ctrl->opmod_idx_opcode =
+		rte_cpu_to_be_32((attr->opmod << 24) |
+				 ((sq->cur_post & 0xffff) << 8) |
+				 attr->opcode);
+	wqe_ctrl->qpn_ds = rte_cpu_to_be_32((attr->len + sizeof(struct mlx5dr_wqe_ctrl_seg)) / 16 |
+			       sq->sqn << 8);
+	wqe_ctrl->imm = rte_cpu_to_be_32(attr->id);
+
+	flags |= attr->notify_hw ? MLX5_WQE_CTRL_CQ_UPDATE : 0;
+	flags |= attr->fence ? MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE : 0;
+	wqe_ctrl->flags = rte_cpu_to_be_32(flags);
+
+	sq->wr_priv[idx].id = attr->id;
+	sq->wr_priv[idx].retry_id = attr->retry_id;
+
+	sq->wr_priv[idx].rule = attr->rule;
+	sq->wr_priv[idx].user_data = attr->user_data;
+	sq->wr_priv[idx].num_wqebbs = ctrl->num_wqebbs;
+
+	if (attr->rule) {
+		sq->wr_priv[idx].rule->pending_wqes++;
+		sq->wr_priv[idx].used_id = attr->used_id;
+	}
+
+	sq->cur_post += ctrl->num_wqebbs;
+
+	if (attr->notify_hw)
+		mlx5dr_send_engine_post_ring(sq, ctrl->queue->uar, wqe_ctrl);
+}
+
+static
+void mlx5dr_send_wqe(struct mlx5dr_send_engine *queue,
+		     struct mlx5dr_send_engine_post_attr *send_attr,
+		     struct mlx5dr_wqe_gta_ctrl_seg *send_wqe_ctrl,
+		     void *send_wqe_data,
+		     void *send_wqe_tag,
+		     bool is_jumbo,
+		     uint8_t gta_opcode,
+		     uint32_t direct_index)
+{
+	struct mlx5dr_wqe_gta_data_seg_ste *wqe_data;
+	struct mlx5dr_wqe_gta_ctrl_seg *wqe_ctrl;
+	struct mlx5dr_send_engine_post_ctrl ctrl;
+	size_t wqe_len;
+
+	ctrl = mlx5dr_send_engine_post_start(queue);
+	mlx5dr_send_engine_post_req_wqe(&ctrl, (void *)&wqe_ctrl, &wqe_len);
+	mlx5dr_send_engine_post_req_wqe(&ctrl, (void *)&wqe_data, &wqe_len);
+
+	wqe_ctrl->op_dirix = htobe32(gta_opcode << 28 | direct_index);
+	memcpy(wqe_ctrl->stc_ix, send_wqe_ctrl->stc_ix, sizeof(send_wqe_ctrl->stc_ix));
+
+	if (send_wqe_data)
+		memcpy(wqe_data, send_wqe_data, sizeof(*wqe_data));
+	else
+		mlx5dr_send_wqe_set_tag(wqe_data, send_wqe_tag, is_jumbo);
+
+	mlx5dr_send_engine_post_end(&ctrl, send_attr);
+}
+
+void mlx5dr_send_ste(struct mlx5dr_send_engine *queue,
+		     struct mlx5dr_send_ste_attr *ste_attr)
+{
+	struct mlx5dr_send_engine_post_attr *send_attr = &ste_attr->send_attr;
+	uint8_t notify_hw = send_attr->notify_hw;
+	uint8_t fence = send_attr->fence;
+
+	if (ste_attr->rtc_1) {
+		send_attr->id = ste_attr->rtc_1;
+		send_attr->used_id = ste_attr->used_id_rtc_1;
+		send_attr->retry_id = ste_attr->retry_rtc_1;
+		send_attr->fence = fence;
+		send_attr->notify_hw = notify_hw && !ste_attr->rtc_0;
+		mlx5dr_send_wqe(queue, send_attr,
+				ste_attr->wqe_ctrl,
+				ste_attr->wqe_data,
+				ste_attr->wqe_tag,
+				ste_attr->wqe_tag_is_jumbo,
+				ste_attr->gta_opcode,
+				ste_attr->direct_index);
+	}
+
+	if (ste_attr->rtc_0) {
+		send_attr->id = ste_attr->rtc_0;
+		send_attr->used_id = ste_attr->used_id_rtc_0;
+		send_attr->retry_id = ste_attr->retry_rtc_0;
+		send_attr->fence = fence && !ste_attr->rtc_1;
+		send_attr->notify_hw = notify_hw;
+		mlx5dr_send_wqe(queue, send_attr,
+				ste_attr->wqe_ctrl,
+				ste_attr->wqe_data,
+				ste_attr->wqe_tag,
+				ste_attr->wqe_tag_is_jumbo,
+				ste_attr->gta_opcode,
+				ste_attr->direct_index);
+	}
+
+	/* Restore to ortginal requested values */
+	send_attr->notify_hw = notify_hw;
+	send_attr->fence = fence;
+}
+
+static void mlx5dr_send_engine_retry_post_send(struct mlx5dr_send_engine *queue,
+					       struct mlx5dr_send_ring_priv *priv,
+					       uint16_t wqe_cnt)
+{
+	struct mlx5dr_send_engine_post_attr send_attr = {0};
+	struct mlx5dr_wqe_gta_data_seg_ste *wqe_data;
+	struct mlx5dr_wqe_gta_ctrl_seg *wqe_ctrl;
+	struct mlx5dr_send_engine_post_ctrl ctrl;
+	struct mlx5dr_send_ring_sq *send_sq;
+	unsigned int idx;
+	size_t wqe_len;
+	char *p;
+
+	send_attr.rule = priv->rule;
+	send_attr.opcode = MLX5DR_WQE_OPCODE_TBL_ACCESS;
+	send_attr.opmod = MLX5DR_WQE_GTA_OPMOD_STE;
+	send_attr.len = MLX5_SEND_WQE_BB * 2 - sizeof(struct mlx5dr_wqe_ctrl_seg);
+	send_attr.notify_hw = 1;
+	send_attr.fence = 0;
+	send_attr.user_data = priv->user_data;
+	send_attr.id = priv->retry_id;
+	send_attr.used_id = priv->used_id;
+
+	ctrl = mlx5dr_send_engine_post_start(queue);
+	mlx5dr_send_engine_post_req_wqe(&ctrl, (void *)&wqe_ctrl, &wqe_len);
+	mlx5dr_send_engine_post_req_wqe(&ctrl, (void *)&wqe_data, &wqe_len);
+
+	send_sq = &ctrl.send_ring->send_sq;
+	idx = wqe_cnt & send_sq->buf_mask;
+	p = send_sq->buf + (idx << MLX5_SEND_WQE_SHIFT);
+
+	/* Copy old gta ctrl */
+	memcpy(wqe_ctrl, p + sizeof(struct mlx5dr_wqe_ctrl_seg),
+	       MLX5_SEND_WQE_BB - sizeof(struct mlx5dr_wqe_ctrl_seg));
+
+	idx = (wqe_cnt + 1) & send_sq->buf_mask;
+	p = send_sq->buf + (idx << MLX5_SEND_WQE_SHIFT);
+
+	/* Copy old gta data */
+	memcpy(wqe_data, p, MLX5_SEND_WQE_BB);
+
+	mlx5dr_send_engine_post_end(&ctrl, &send_attr);
+}
+
+void mlx5dr_send_engine_flush_queue(struct mlx5dr_send_engine *queue)
+{
+	struct mlx5dr_send_ring_sq *sq = &queue->send_ring[0].send_sq;
+	struct mlx5dr_wqe_ctrl_seg *wqe_ctrl;
+
+	wqe_ctrl = (void *)(sq->buf + (sq->last_idx << MLX5_SEND_WQE_SHIFT));
+
+	wqe_ctrl->flags |= rte_cpu_to_be_32(MLX5_WQE_CTRL_CQ_UPDATE);
+
+	mlx5dr_send_engine_post_ring(sq, queue->uar, wqe_ctrl);
+}
+
+static void mlx5dr_send_engine_update_rule(struct mlx5dr_send_engine *queue,
+					   struct mlx5dr_send_ring_priv *priv,
+					   uint16_t wqe_cnt,
+					   enum rte_flow_op_status *status)
+{
+	priv->rule->pending_wqes--;
+
+	if (*status == RTE_FLOW_OP_ERROR) {
+		if (priv->retry_id) {
+			mlx5dr_send_engine_retry_post_send(queue, priv, wqe_cnt);
+			return;
+		}
+		/* Some part of the rule failed */
+		priv->rule->status = MLX5DR_RULE_STATUS_FAILING;
+		*priv->used_id = 0;
+	} else {
+		*priv->used_id = priv->id;
+	}
+
+	/* Update rule status for the last completion */
+	if (!priv->rule->pending_wqes) {
+		if (unlikely(priv->rule->status == MLX5DR_RULE_STATUS_FAILING)) {
+			/* Rule completely failed and doesn't require cleanup */
+			if (!priv->rule->rtc_0 && !priv->rule->rtc_1)
+				priv->rule->status = MLX5DR_RULE_STATUS_FAILED;
+
+			*status = RTE_FLOW_OP_ERROR;
+		} else {
+			/* Increase the status, this only works on good flow as the enum
+			 * is arrange it away creating -> created -> deleting -> deleted
+			 */
+			priv->rule->status++;
+			*status = RTE_FLOW_OP_SUCCESS;
+			/* Rule was deleted now we can safely release action STEs */
+			if (priv->rule->status == MLX5DR_RULE_STATUS_DELETED)
+				mlx5dr_rule_free_action_ste_idx(priv->rule);
+		}
+	}
+}
+
+static void mlx5dr_send_engine_update(struct mlx5dr_send_engine *queue,
+				      struct mlx5_cqe64 *cqe,
+				      struct mlx5dr_send_ring_priv *priv,
+				      struct rte_flow_op_result res[],
+				      int64_t *i,
+				      uint32_t res_nb,
+				      uint16_t wqe_cnt)
+{
+	enum rte_flow_op_status status;
+
+	if (!cqe || (likely(rte_be_to_cpu_32(cqe->byte_cnt) >> 31 == 0) &&
+	    likely(mlx5dv_get_cqe_opcode(cqe) == MLX5_CQE_REQ))) {
+		status = RTE_FLOW_OP_SUCCESS;
+	} else {
+		status = RTE_FLOW_OP_ERROR;
+	}
+
+	if (priv->user_data) {
+		if (priv->rule) {
+			mlx5dr_send_engine_update_rule(queue, priv, wqe_cnt, &status);
+			/* Completion is provided on the last rule WQE */
+			if (priv->rule->pending_wqes)
+				return;
+		}
+
+		if (*i < res_nb) {
+			res[*i].user_data = priv->user_data;
+			res[*i].status = status;
+			(*i)++;
+			mlx5dr_send_engine_dec_rule(queue);
+		} else {
+			mlx5dr_send_engine_gen_comp(queue, priv->user_data, status);
+		}
+	}
+}
+
+static void mlx5dr_send_engine_poll_cq(struct mlx5dr_send_engine *queue,
+				       struct mlx5dr_send_ring *send_ring,
+				       struct rte_flow_op_result res[],
+				       int64_t *i,
+				       uint32_t res_nb)
+{
+	struct mlx5dr_send_ring_cq *cq = &send_ring->send_cq;
+	struct mlx5dr_send_ring_sq *sq = &send_ring->send_sq;
+	uint32_t cq_idx = cq->cons_index & (cq->ncqe_mask);
+	struct mlx5dr_send_ring_priv *priv;
+	struct mlx5_cqe64 *cqe;
+	uint32_t offset_cqe64;
+	uint8_t cqe_opcode;
+	uint8_t cqe_owner;
+	uint16_t wqe_cnt;
+	uint8_t sw_own;
+
+	offset_cqe64 = RTE_CACHE_LINE_SIZE - sizeof(struct mlx5_cqe64);
+	cqe = (void *)(cq->buf + (cq_idx << cq->cqe_log_sz) + offset_cqe64);
+
+	sw_own = (cq->cons_index & cq->ncqe) ? 1 : 0;
+	cqe_opcode = mlx5dv_get_cqe_opcode(cqe);
+	cqe_owner = mlx5dv_get_cqe_owner(cqe);
+
+	if (cqe_opcode == MLX5_CQE_INVALID ||
+	    cqe_owner != sw_own)
+		return;
+
+	if (unlikely(mlx5dv_get_cqe_opcode(cqe) != MLX5_CQE_REQ))
+		queue->err = true;
+
+	rte_io_rmb();
+
+	wqe_cnt = be16toh(cqe->wqe_counter) & sq->buf_mask;
+
+	while (cq->poll_wqe != wqe_cnt) {
+		priv = &sq->wr_priv[cq->poll_wqe];
+		mlx5dr_send_engine_update(queue, NULL, priv, res, i, res_nb, 0);
+		cq->poll_wqe = (cq->poll_wqe + priv->num_wqebbs) & sq->buf_mask;
+	}
+
+	priv = &sq->wr_priv[wqe_cnt];
+	cq->poll_wqe = (wqe_cnt + priv->num_wqebbs) & sq->buf_mask;
+	mlx5dr_send_engine_update(queue, cqe, priv, res, i, res_nb, wqe_cnt);
+	cq->cons_index++;
+}
+
+static void mlx5dr_send_engine_poll_cqs(struct mlx5dr_send_engine *queue,
+					struct rte_flow_op_result res[],
+					int64_t *polled,
+					uint32_t res_nb)
+{
+	int j;
+
+	for (j = 0; j < MLX5DR_NUM_SEND_RINGS; j++) {
+		mlx5dr_send_engine_poll_cq(queue, &queue->send_ring[j],
+					   res, polled, res_nb);
+
+		*queue->send_ring[j].send_cq.db = htobe32(queue->send_ring[j].send_cq.cons_index & 0xffffff);
+	}
+}
+
+static void mlx5dr_send_engine_poll_list(struct mlx5dr_send_engine *queue,
+					 struct rte_flow_op_result res[],
+					 int64_t *polled,
+					 uint32_t res_nb)
+{
+	struct mlx5dr_completed_poll *comp = &queue->completed;
+
+	while (comp->ci != comp->pi) {
+		if (*polled < res_nb) {
+			res[*polled].status =
+				comp->entries[comp->ci].status;
+			res[*polled].user_data =
+				comp->entries[comp->ci].user_data;
+			(*polled)++;
+			comp->ci = (comp->ci + 1) & comp->mask;
+			mlx5dr_send_engine_dec_rule(queue);
+                } else {
+			return;
+		}
+	}
+}
+
+static int mlx5dr_send_engine_poll(struct mlx5dr_send_engine *queue,
+				   struct rte_flow_op_result res[],
+				   uint32_t res_nb)
+{
+	int64_t polled = 0;
+
+	mlx5dr_send_engine_poll_list(queue, res, &polled, res_nb);
+
+	if (polled >= res_nb)
+		return polled;
+
+	mlx5dr_send_engine_poll_cqs(queue, res, &polled, res_nb);
+
+	return polled;
+}
+
+int mlx5dr_send_queue_poll(struct mlx5dr_context *ctx,
+			   uint16_t queue_id,
+			   struct rte_flow_op_result res[],
+			   uint32_t res_nb)
+{
+	return mlx5dr_send_engine_poll(&ctx->send_queue[queue_id],
+				       res, res_nb);
+}
+
+static int mlx5dr_send_ring_create_sq_obj(struct mlx5dr_context *ctx,
+					  struct mlx5dr_send_engine *queue,
+					  struct mlx5dr_send_ring_sq *sq,
+					  struct mlx5dr_send_ring_cq *cq,
+					  size_t log_wq_sz)
+{
+	struct mlx5dr_cmd_sq_create_attr attr = {0};
+	int err;
+
+	attr.cqn = cq->cqn;
+	attr.pdn = ctx->pd_num;
+	attr.page_id = queue->uar->page_id;
+	attr.dbr_id = sq->db_umem->umem_id;
+	attr.wq_id = sq->buf_umem->umem_id;
+	attr.log_wq_sz = log_wq_sz;
+
+	sq->obj = mlx5dr_cmd_sq_create(ctx->ibv_ctx, &attr);
+	if (!sq->obj)
+		return rte_errno;
+
+	sq->sqn = sq->obj->id;
+
+	err = mlx5dr_cmd_sq_modify_rdy(sq->obj);
+	if (err)
+		goto free_sq;
+
+	return 0;
+
+free_sq:
+	mlx5dr_cmd_destroy_obj(sq->obj);
+
+	return err;
+}
+
+static inline unsigned long align(unsigned long val, unsigned long align)
+{
+        return (val + align - 1) & ~(align - 1);
+}
+
+static int mlx5dr_send_ring_open_sq(struct mlx5dr_context *ctx,
+				    struct mlx5dr_send_engine *queue,
+				    struct mlx5dr_send_ring_sq *sq,
+				    struct mlx5dr_send_ring_cq *cq)
+{
+	size_t sq_log_buf_sz;
+	size_t buf_aligned;
+	size_t sq_buf_sz;
+	size_t buf_sz;
+	int err;
+
+	buf_sz = queue->num_entries * MAX_WQES_PER_RULE;
+	sq_log_buf_sz = log2above(buf_sz);
+	sq_buf_sz = 1 << (sq_log_buf_sz + log2above(MLX5_SEND_WQE_BB));
+	sq->reg_addr = queue->uar->reg_addr;
+
+	buf_aligned = align(sq_buf_sz, sysconf(_SC_PAGESIZE));
+	err = posix_memalign((void **)&sq->buf, sysconf(_SC_PAGESIZE), buf_aligned);
+	if (err) {
+		rte_errno = ENOMEM;
+		return err;
+	}
+	memset(sq->buf, 0, buf_aligned);
+
+	err = posix_memalign((void **)&sq->db, 8, 8);
+	if (err)
+		goto free_buf;
+
+	sq->buf_umem = mlx5_glue->devx_umem_reg(ctx->ibv_ctx, sq->buf, sq_buf_sz, 0);
+
+	if (!sq->buf_umem) {
+		err = errno;
+		goto free_db;
+	}
+
+	sq->db_umem = mlx5_glue->devx_umem_reg(ctx->ibv_ctx, sq->db, 8, 0);
+	if (!sq->db_umem) {
+		err = errno;
+		goto free_buf_umem;
+	}
+
+	err = mlx5dr_send_ring_create_sq_obj(ctx, queue, sq, cq, sq_log_buf_sz);
+
+	if (err)
+		goto free_db_umem;
+
+	sq->wr_priv = simple_malloc(sizeof(*sq->wr_priv) * buf_sz);
+	if (!sq->wr_priv) {
+		err = ENOMEM;
+		goto destroy_sq_obj;
+	}
+
+	sq->dep_wqe = simple_calloc(queue->num_entries ,sizeof(*sq->dep_wqe));
+	if (!sq->dep_wqe) {
+		err = ENOMEM;
+		goto destroy_wr_priv;
+	}
+
+	sq->buf_mask = buf_sz - 1;
+
+	return 0;
+
+destroy_wr_priv:
+	simple_free(sq->wr_priv);
+destroy_sq_obj:
+	mlx5dr_cmd_destroy_obj(sq->obj);
+free_db_umem:
+	mlx5_glue->devx_umem_dereg(sq->db_umem);
+free_buf_umem:
+	mlx5_glue->devx_umem_dereg(sq->buf_umem);
+free_db:
+	free(sq->db);
+free_buf:
+	free(sq->buf);
+	rte_errno = err;
+	return err;
+}
+
+static void mlx5dr_send_ring_close_sq(struct mlx5dr_send_ring_sq *sq)
+{
+	simple_free(sq->dep_wqe);
+	mlx5dr_cmd_destroy_obj(sq->obj);
+	mlx5_glue->devx_umem_dereg(sq->db_umem);
+	mlx5_glue->devx_umem_dereg(sq->buf_umem);
+	simple_free(sq->wr_priv);
+	free(sq->db);
+	free(sq->buf);
+}
+
+static int mlx5dr_send_ring_open_cq(struct mlx5dr_context *ctx,
+				    struct mlx5dr_send_engine *queue,
+				    struct mlx5dr_send_ring_cq *cq)
+{
+	struct mlx5dv_cq mlx5_cq = {0};
+	struct mlx5dv_obj obj;
+	struct ibv_cq *ibv_cq;
+	size_t cq_size;
+	int err;
+
+	cq_size = queue->num_entries;
+	ibv_cq = mlx5_glue->create_cq(ctx->ibv_ctx, cq_size, NULL, NULL, 0);
+	if (!ibv_cq) {
+		DR_LOG(ERR, "Failed to create CQ");
+		rte_errno = errno;
+		return rte_errno;
+	}
+
+	obj.cq.in = ibv_cq;
+	obj.cq.out = &mlx5_cq;
+	err = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ);
+	if (err) {
+		err = errno;
+		goto close_cq;
+	}
+
+	cq->buf = mlx5_cq.buf;
+	cq->db = mlx5_cq.dbrec;
+	cq->ncqe = mlx5_cq.cqe_cnt;
+	if (cq->ncqe < queue->num_entries)
+		DR_LOG(ERR, "%s - (ncqe: %u quque_num_entries: %u) Bug?!",
+			__func__,
+			cq->ncqe,
+			queue->num_entries); /* TODO - Debug test */
+	cq->cqe_sz = mlx5_cq.cqe_size;
+	cq->cqe_log_sz = log2above(cq->cqe_sz);
+	cq->ncqe_mask = cq->ncqe - 1;
+	cq->buf_sz = cq->cqe_sz * cq->ncqe;
+        cq->cqn = mlx5_cq.cqn;
+	cq->ibv_cq = ibv_cq;
+
+        return 0;
+
+close_cq:
+	mlx5_glue->destroy_cq(ibv_cq);
+	rte_errno = err;
+	return err;
+}
+
+static void mlx5dr_send_ring_close_cq(struct mlx5dr_send_ring_cq *cq)
+{
+	mlx5_glue->destroy_cq(cq->ibv_cq);
+}
+
+static void mlx5dr_send_ring_close(struct mlx5dr_send_ring *ring)
+{
+	mlx5dr_send_ring_close_sq(&ring->send_sq);
+	mlx5dr_send_ring_close_cq(&ring->send_cq);
+}
+
+static int mlx5dr_send_ring_open(struct mlx5dr_context *ctx,
+				 struct mlx5dr_send_engine *queue,
+				 struct mlx5dr_send_ring *ring)
+{
+	int err;
+
+	err = mlx5dr_send_ring_open_cq(ctx, queue, &ring->send_cq);
+	if (err)
+		return err;
+
+	err = mlx5dr_send_ring_open_sq(ctx, queue, &ring->send_sq, &ring->send_cq);
+	if (err)
+		goto close_cq;
+
+	return err;
+
+close_cq:
+	mlx5dr_send_ring_close_cq(&ring->send_cq);
+
+	return err;
+}
+
+static void __mlx5dr_send_rings_close(struct mlx5dr_send_engine *queue,
+				      uint16_t i)
+{
+	while (i--)
+		mlx5dr_send_ring_close(&queue->send_ring[i]);
+}
+
+static void mlx5dr_send_rings_close(struct mlx5dr_send_engine *queue)
+{
+	__mlx5dr_send_rings_close(queue, queue->rings);
+}
+
+static int mlx5dr_send_rings_open(struct mlx5dr_context *ctx,
+				  struct mlx5dr_send_engine *queue)
+{
+	uint16_t i;
+	int err;
+
+	for (i = 0; i < queue->rings; i++) {
+		err = mlx5dr_send_ring_open(ctx, queue, &queue->send_ring[i]);
+		if (err)
+			goto free_rings;
+	}
+
+	return 0;
+
+free_rings:
+	__mlx5dr_send_rings_close(queue, i);
+
+	return err;
+}
+
+void mlx5dr_send_queue_close(struct mlx5dr_send_engine *queue)
+{
+	mlx5dr_send_rings_close(queue);
+	simple_free(queue->completed.entries);
+	mlx5_glue->devx_free_uar(queue->uar);
+}
+
+int mlx5dr_send_queue_open(struct mlx5dr_context *ctx,
+			   struct mlx5dr_send_engine *queue,
+			   uint16_t queue_size)
+{
+	struct mlx5dv_devx_uar *uar;
+	int err;
+
+#ifdef MLX5DV_UAR_ALLOC_TYPE_NC
+	uar = mlx5_glue->devx_alloc_uar(ctx->ibv_ctx, MLX5_IB_UAPI_UAR_ALLOC_TYPE_NC);
+	if (!uar) {
+		rte_errno = errno;
+		return rte_errno;
+	}
+#else
+	uar = NULL;
+	rte_errno = ENOTSUP;
+	return rte_errno;
+#endif
+
+	queue->uar = uar;
+	queue->rings = MLX5DR_NUM_SEND_RINGS;
+	queue->num_entries = roundup_pow_of_two(queue_size); /* TODO */
+	queue->used_entries = 0;
+	queue->th_entries = queue->num_entries;
+
+	queue->completed.entries = simple_calloc(queue->num_entries,
+						 sizeof(queue->completed.entries[0]));
+	if (!queue->completed.entries) {
+		rte_errno = ENOMEM;
+		goto free_uar;
+	}
+	queue->completed.pi = 0;
+	queue->completed.ci = 0;
+	queue->completed.mask = queue->num_entries - 1;
+
+	err = mlx5dr_send_rings_open(ctx, queue);
+	if (err)
+		goto free_completed_entries;
+
+	return 0;
+
+free_completed_entries:
+	simple_free(queue->completed.entries);
+free_uar:
+	mlx5_glue->devx_free_uar(uar);
+	return rte_errno;
+}
+
+static void __mlx5dr_send_queues_close(struct mlx5dr_context *ctx, uint16_t queues)
+{
+	struct mlx5dr_send_engine *queue;
+
+	while (queues--) {
+		queue = &ctx->send_queue[queues];
+
+		mlx5dr_send_queue_close(queue);
+	}
+}
+
+void mlx5dr_send_queues_close(struct mlx5dr_context *ctx)
+{
+	__mlx5dr_send_queues_close(ctx, ctx->queues);
+	simple_free(ctx->send_queue);
+}
+
+int mlx5dr_send_queues_open(struct mlx5dr_context *ctx,
+			    uint16_t queues,
+			    uint16_t queue_size)
+{
+	uint32_t i;
+	int err = 0;
+
+	/* TODO: For now there is a 1:1 queue:ring mapping
+	 * add middle logic layer if it ever changes.
+	 */
+	/* open one extra queue for control path */
+	ctx->queues = queues + 1;
+
+	ctx->send_queue = simple_calloc(ctx->queues, sizeof(*ctx->send_queue));
+	if (!ctx->send_queue) {
+		rte_errno = ENOMEM;
+		return rte_errno;
+	}
+
+	for (i = 0; i < ctx->queues; i++) {
+		err = mlx5dr_send_queue_open(ctx, &ctx->send_queue[i], queue_size);
+		if (err)
+			goto close_send_queues;
+	}
+
+	return 0;
+
+close_send_queues:
+	 __mlx5dr_send_queues_close(ctx, i);
+
+	simple_free(ctx->send_queue);
+
+	return err;
+}
+
+int mlx5dr_send_queue_action(struct mlx5dr_context *ctx,
+			     uint16_t queue_id,
+			     uint32_t actions)
+{
+	struct mlx5dr_send_ring_sq *send_sq;
+	struct mlx5dr_send_engine *queue;
+
+	queue = &ctx->send_queue[queue_id];
+	send_sq = &queue->send_ring->send_sq;
+
+	if (actions == MLX5DR_SEND_QUEUE_ACTION_DRAIN) {
+		if (send_sq->head_dep_idx != send_sq->tail_dep_idx)
+			/* Send dependent WQEs to drain the queue */
+			mlx5dr_send_all_dep_wqe(queue);
+		else
+			/* Signal on the last posted WQE */
+			mlx5dr_send_engine_flush_queue(queue);
+	} else {
+		rte_errno = -EINVAL;
+		return rte_errno;
+	}
+
+	return 0;
+}
diff --git a/drivers/net/mlx5/hws/mlx5dr_send.h b/drivers/net/mlx5/hws/mlx5dr_send.h
new file mode 100644
index 0000000000..1897a1df9e
--- /dev/null
+++ b/drivers/net/mlx5/hws/mlx5dr_send.h
@@ -0,0 +1,273 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) Copyright (c) 2022 NVIDIA Corporation 2021 NVIDIA CORPORATION. All rights reserved. Affiliates
+ */
+
+#ifndef MLX5DR_SEND_H_
+#define MLX5DR_SEND_H_
+
+#define MLX5DR_NUM_SEND_RINGS 1
+
+/* As a single operation requires at least two WQEBBS this means a maximum of 16
+ * such operations per rule
+ */
+#define MAX_WQES_PER_RULE 32
+
+/* WQE Control segment. */
+struct mlx5dr_wqe_ctrl_seg {
+        __be32 opmod_idx_opcode;
+        __be32 qpn_ds;
+        __be32 flags;
+        __be32 imm;
+};
+
+enum mlx5dr_wqe_opcode {
+	MLX5DR_WQE_OPCODE_TBL_ACCESS = 0x2c,
+};
+
+enum mlx5dr_wqe_opmod {
+	MLX5DR_WQE_OPMOD_GTA_STE = 0,
+	MLX5DR_WQE_OPMOD_GTA_MOD_ARG = 1,
+};
+
+enum mlx5dr_wqe_gta_opcode {
+	MLX5DR_WQE_GTA_OP_ACTIVATE = 0,
+	MLX5DR_WQE_GTA_OP_DEACTIVATE = 1,
+};
+
+enum mlx5dr_wqe_gta_opmod {
+	MLX5DR_WQE_GTA_OPMOD_STE = 0,
+	MLX5DR_WQE_GTA_OPMOD_MOD_ARG = 1,
+};
+
+enum mlx5dr_wqe_gta_sz {
+	MLX5DR_WQE_SZ_GTA_CTRL = 48,
+	MLX5DR_WQE_SZ_GTA_DATA = 64,
+};
+
+struct mlx5dr_wqe_gta_ctrl_seg {
+	__be32 op_dirix;
+	__be32 stc_ix[5];
+	__be32 rsvd0[6];
+};
+
+struct mlx5dr_wqe_gta_data_seg_ste {
+	__be32 rsvd0_ctr_id;
+	__be32 rsvd1[4];
+	__be32 action[3];
+	__be32 tag[8];
+};
+
+struct mlx5dr_wqe_gta_data_seg_arg {
+	__be32 action_args[8];
+};
+
+struct mlx5dr_wqe_gta {
+	struct mlx5dr_wqe_gta_ctrl_seg gta_ctrl;
+	union {
+		struct mlx5dr_wqe_gta_data_seg_ste seg_ste;
+		struct mlx5dr_wqe_gta_data_seg_arg seg_arg;
+	};
+};
+
+struct mlx5dr_send_ring_cq {
+        uint8_t *buf;
+        uint32_t cons_index;
+	uint32_t ncqe_mask;
+	uint32_t buf_sz;
+        uint32_t ncqe;
+	uint32_t cqe_log_sz;
+        __be32 *db;
+	uint16_t poll_wqe;
+        struct ibv_cq *ibv_cq;
+        uint32_t cqn;
+        uint32_t cqe_sz;
+};
+
+struct mlx5dr_send_ring_priv {
+	struct mlx5dr_rule *rule;
+	void *user_data;
+	uint32_t num_wqebbs;
+	uint32_t id;
+	uint32_t retry_id;
+	uint32_t *used_id;
+};
+
+struct mlx5dr_send_ring_dep_wqe {
+	struct mlx5dr_wqe_gta_ctrl_seg wqe_ctrl;
+	struct mlx5dr_wqe_gta_data_seg_ste wqe_data;
+	struct mlx5dr_rule *rule;
+	uint32_t rtc_0;
+	uint32_t rtc_1;
+	uint32_t retry_rtc_0;
+	uint32_t retry_rtc_1;
+	void *user_data;
+};
+
+struct mlx5dr_send_ring_sq {
+	char *buf;
+	uint32_t sqn;
+	__be32 *db;
+	void *reg_addr;
+	uint16_t cur_post;
+	uint16_t buf_mask;
+	struct mlx5dr_send_ring_priv *wr_priv;
+	unsigned last_idx;
+	struct mlx5dr_send_ring_dep_wqe *dep_wqe;
+	unsigned head_dep_idx;
+	unsigned tail_dep_idx;
+	struct mlx5dr_devx_obj *obj;
+	struct mlx5dv_devx_umem *buf_umem;
+	struct mlx5dv_devx_umem *db_umem;
+};
+
+struct mlx5dr_send_ring {
+	struct mlx5dr_send_ring_cq send_cq;
+	struct mlx5dr_send_ring_sq send_sq;
+};
+
+struct mlx5dr_completed_poll_entry {
+	void *user_data;
+	enum rte_flow_op_status status;
+};
+
+struct mlx5dr_completed_poll {
+	struct mlx5dr_completed_poll_entry *entries;
+	uint16_t ci;
+	uint16_t pi;
+	uint16_t mask;
+};
+
+struct mlx5dr_send_engine {
+	struct mlx5dr_send_ring send_ring[MLX5DR_NUM_SEND_RINGS]; /* For now 1:1 mapping */
+	struct mlx5dv_devx_uar *uar; /* Uar is shared between rings of a queue */
+	struct mlx5dr_completed_poll completed;
+	uint16_t used_entries;
+	uint16_t th_entries;
+	uint16_t rings;
+	uint16_t num_entries;
+	bool err;
+} __rte_cache_aligned;
+
+struct mlx5dr_send_engine_post_ctrl {
+	struct mlx5dr_send_engine *queue;
+	struct mlx5dr_send_ring *send_ring;
+	size_t num_wqebbs;
+};
+
+struct mlx5dr_send_engine_post_attr {
+	uint8_t opcode;
+	uint8_t opmod;
+	uint8_t notify_hw;
+	uint8_t fence;
+	size_t len;
+	struct mlx5dr_rule *rule;
+	uint32_t id;
+	uint32_t retry_id;
+	uint32_t *used_id;
+	void *user_data;
+};
+
+struct mlx5dr_send_ste_attr {
+	/* rtc / retry_rtc / used_id_rtc override send_attr */
+	uint32_t rtc_0;
+	uint32_t rtc_1;
+	uint32_t retry_rtc_0;
+	uint32_t retry_rtc_1;
+	uint32_t *used_id_rtc_0;
+	uint32_t *used_id_rtc_1;
+	bool wqe_tag_is_jumbo;
+	uint8_t gta_opcode;
+	uint32_t direct_index;
+	struct mlx5dr_send_engine_post_attr send_attr;
+	struct mlx5dr_rule_match_tag *wqe_tag;
+	struct mlx5dr_wqe_gta_ctrl_seg *wqe_ctrl;
+	struct mlx5dr_wqe_gta_data_seg_ste *wqe_data;
+};
+
+/**
+ * Provide safe 64bit store operation to mlx5 UAR region for both 32bit and
+ * 64bit architectures.
+ *
+ * @param val
+ *   value to write in CPU endian format.
+ * @param addr
+ *   Address to write to.
+ * @param lock
+ *   Address of the lock to use for that UAR access.
+ */
+static __rte_always_inline void
+mlx5dr_uar_write64_relaxed(uint64_t val, void *addr)
+{
+#ifdef RTE_ARCH_64
+	*(uint64_t *)addr = val;
+#else /* !RTE_ARCH_64 */
+	*(uint32_t *)addr = val;
+	rte_io_wmb();
+	*((uint32_t *)addr + 1) = val >> 32;
+#endif
+}
+
+struct mlx5dr_send_ring_dep_wqe *
+mlx5dr_send_add_new_dep_wqe(struct mlx5dr_send_engine *queue);
+
+void mlx5dr_send_abort_new_dep_wqe(struct mlx5dr_send_engine *queue);
+
+void mlx5dr_send_all_dep_wqe(struct mlx5dr_send_engine *queue);
+
+void mlx5dr_send_queue_close(struct mlx5dr_send_engine *queue);
+
+int mlx5dr_send_queue_open(struct mlx5dr_context *ctx,
+			   struct mlx5dr_send_engine *queue,
+			   uint16_t queue_size);
+
+void mlx5dr_send_queues_close(struct mlx5dr_context *ctx);
+
+int mlx5dr_send_queues_open(struct mlx5dr_context *ctx,
+			    uint16_t queues,
+			    uint16_t queue_size);
+
+struct mlx5dr_send_engine_post_ctrl
+mlx5dr_send_engine_post_start(struct mlx5dr_send_engine *queue);
+void mlx5dr_send_engine_post_req_wqe(struct mlx5dr_send_engine_post_ctrl *ctrl,
+				     char **buf, size_t *len);
+void mlx5dr_send_engine_post_end(struct mlx5dr_send_engine_post_ctrl *ctrl,
+				 struct mlx5dr_send_engine_post_attr *attr);
+
+void mlx5dr_send_ste(struct mlx5dr_send_engine *queue,
+		     struct mlx5dr_send_ste_attr *ste_attr);
+
+void mlx5dr_send_engine_flush_queue(struct mlx5dr_send_engine *queue);
+
+static inline bool mlx5dr_send_engine_full(struct mlx5dr_send_engine *queue)
+{
+	return queue->used_entries >= queue->th_entries;
+}
+
+static inline void mlx5dr_send_engine_inc_rule(struct mlx5dr_send_engine *queue)
+{
+	queue->used_entries++;
+}
+
+static inline void mlx5dr_send_engine_dec_rule(struct mlx5dr_send_engine *queue)
+{
+	queue->used_entries--;
+}
+
+static inline void mlx5dr_send_engine_gen_comp(struct mlx5dr_send_engine *queue,
+					       void *user_data,
+					       int comp_status)
+{
+	struct mlx5dr_completed_poll *comp = &queue->completed;
+
+	comp->entries[comp->pi].status = comp_status;
+	comp->entries[comp->pi].user_data = user_data;
+
+	comp->pi = (comp->pi + 1) & comp->mask;
+}
+
+static inline bool mlx5dr_send_engine_err(struct mlx5dr_send_engine *queue)
+{
+	return queue->err;
+}
+
+#endif