[v3,05/14] net/mlx5: support Tx hairpin queues

Message ID 1571130263-120863-6-git-send-email-orika@mellanox.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series add hairpin feature |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail Compilation issues

Commit Message

Ori Kam Oct. 15, 2019, 9:04 a.m. UTC
This commit adds the support for creating Tx hairpin queues.
Hairpin queue is a queue that is created using DevX and only used
by the HW.

Signed-off-by: Ori Kam <orika@mellanox.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>

---
 drivers/net/mlx5/mlx5.c           |  36 +++++-
 drivers/net/mlx5/mlx5.h           |  46 ++++++++
 drivers/net/mlx5/mlx5_devx_cmds.c | 186 ++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_prm.h       | 118 +++++++++++++++++++
 drivers/net/mlx5/mlx5_rxtx.h      |  18 ++-
 drivers/net/mlx5/mlx5_trigger.c   |  10 +-
 drivers/net/mlx5/mlx5_txq.c       | 230 +++++++++++++++++++++++++++++++++++---
 7 files changed, 620 insertions(+), 24 deletions(-)
  

Patch

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 2431a55..c53a9c6 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -325,6 +325,9 @@  struct mlx5_dev_spawn_data {
 	struct mlx5_ibv_shared *sh;
 	int err = 0;
 	uint32_t i;
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+	struct mlx5_devx_tis_attr tis_attr = { 0 };
+#endif
 
 	assert(spawn);
 	/* Secondary process should not create the shared context. */
@@ -389,10 +392,25 @@  struct mlx5_dev_spawn_data {
 		goto error;
 	}
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
-	err = mlx5_get_pdn(sh->pd, &sh->pdn);
-	if (err) {
-		DRV_LOG(ERR, "Fail to extract pdn from PD");
-		goto error;
+	if (sh->devx) {
+		err = mlx5_get_pdn(sh->pd, &sh->pdn);
+		if (err) {
+			DRV_LOG(ERR, "Fail to extract pdn from PD");
+			goto error;
+		}
+		sh->td = mlx5_devx_cmd_create_td(sh->ctx);
+		if (!sh->td) {
+			DRV_LOG(ERR, "TD allocation failure");
+			err = ENOMEM;
+			goto error;
+		}
+		tis_attr.transport_domain = sh->td->id;
+		sh->tis = mlx5_devx_cmd_create_tis(sh->ctx, &tis_attr);
+		if (!sh->tis) {
+			DRV_LOG(ERR, "TIS allocation failure");
+			err = ENOMEM;
+			goto error;
+		}
 	}
 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */
 	/*
@@ -425,6 +443,10 @@  struct mlx5_dev_spawn_data {
 error:
 	pthread_mutex_unlock(&mlx5_ibv_list_mutex);
 	assert(sh);
+	if (sh->tis)
+		claim_zero(mlx5_devx_cmd_destroy(sh->tis));
+	if (sh->td)
+		claim_zero(mlx5_devx_cmd_destroy(sh->td));
 	if (sh->pd)
 		claim_zero(mlx5_glue->dealloc_pd(sh->pd));
 	if (sh->ctx)
@@ -485,6 +507,10 @@  struct mlx5_dev_spawn_data {
 	pthread_mutex_destroy(&sh->intr_mutex);
 	if (sh->pd)
 		claim_zero(mlx5_glue->dealloc_pd(sh->pd));
+	if (sh->tis)
+		claim_zero(mlx5_devx_cmd_destroy(sh->tis));
+	if (sh->td)
+		claim_zero(mlx5_devx_cmd_destroy(sh->td));
 	if (sh->ctx)
 		claim_zero(mlx5_glue->close_device(sh->ctx));
 	rte_free(sh);
@@ -976,6 +1002,7 @@  struct mlx5_dev_spawn_data {
 	.rx_queue_setup = mlx5_rx_queue_setup,
 	.rx_hairpin_queue_setup = mlx5_rx_hairpin_queue_setup,
 	.tx_queue_setup = mlx5_tx_queue_setup,
+	.tx_hairpin_queue_setup = mlx5_tx_hairpin_queue_setup,
 	.rx_queue_release = mlx5_rx_queue_release,
 	.tx_queue_release = mlx5_tx_queue_release,
 	.flow_ctrl_get = mlx5_dev_get_flow_ctrl,
@@ -1043,6 +1070,7 @@  struct mlx5_dev_spawn_data {
 	.rx_queue_setup = mlx5_rx_queue_setup,
 	.rx_hairpin_queue_setup = mlx5_rx_hairpin_queue_setup,
 	.tx_queue_setup = mlx5_tx_queue_setup,
+	.tx_hairpin_queue_setup = mlx5_tx_hairpin_queue_setup,
 	.rx_queue_release = mlx5_rx_queue_release,
 	.tx_queue_release = mlx5_tx_queue_release,
 	.flow_ctrl_get = mlx5_dev_get_flow_ctrl,
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 36cced9..7ea4950 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -350,6 +350,43 @@  struct mlx5_devx_rqt_attr {
 	uint32_t rq_list[];
 };
 
+/* TIS attributes structure. */
+struct mlx5_devx_tis_attr {
+	uint32_t strict_lag_tx_port_affinity:1;
+	uint32_t tls_en:1;
+	uint32_t lag_tx_port_affinity:4;
+	uint32_t prio:4;
+	uint32_t transport_domain:24;
+};
+
+/* SQ attributes structure, used by SQ create operation. */
+struct mlx5_devx_create_sq_attr {
+	uint32_t rlky:1;
+	uint32_t cd_master:1;
+	uint32_t fre:1;
+	uint32_t flush_in_error_en:1;
+	uint32_t allow_multi_pkt_send_wqe:1;
+	uint32_t min_wqe_inline_mode:3;
+	uint32_t state:4;
+	uint32_t reg_umr:1;
+	uint32_t allow_swp:1;
+	uint32_t hairpin:1;
+	uint32_t user_index:24;
+	uint32_t cqn:24;
+	uint32_t packet_pacing_rate_limit_index:16;
+	uint32_t tis_lst_sz:16;
+	uint32_t tis_num:24;
+	struct mlx5_devx_wq_attr wq_attr;
+};
+
+/* SQ attributes structure, used by SQ modify operation. */
+struct mlx5_devx_modify_sq_attr {
+	uint32_t sq_state:4;
+	uint32_t state:4;
+	uint32_t hairpin_peer_rq:24;
+	uint32_t hairpin_peer_vhca:16;
+};
+
 /**
  * Type of object being allocated.
  */
@@ -591,6 +628,8 @@  struct mlx5_ibv_shared {
 	struct rte_intr_handle intr_handle; /* Interrupt handler for device. */
 	struct rte_intr_handle intr_handle_devx; /* DEVX interrupt handler. */
 	struct mlx5dv_devx_cmd_comp *devx_comp; /* DEVX async comp obj. */
+	struct mlx5_devx_obj *tis; /* TIS object. */
+	struct mlx5_devx_obj *td; /* Transport domain. */
 	struct mlx5_ibv_shared_port port[]; /* per device port data array. */
 };
 
@@ -911,5 +950,12 @@  struct mlx5_devx_obj *mlx5_devx_cmd_create_tir(struct ibv_context *ctx,
 					struct mlx5_devx_tir_attr *tir_attr);
 struct mlx5_devx_obj *mlx5_devx_cmd_create_rqt(struct ibv_context *ctx,
 					struct mlx5_devx_rqt_attr *rqt_attr);
+struct mlx5_devx_obj *mlx5_devx_cmd_create_sq
+	(struct ibv_context *ctx, struct mlx5_devx_create_sq_attr *sq_attr);
+int mlx5_devx_cmd_modify_sq
+	(struct mlx5_devx_obj *sq, struct mlx5_devx_modify_sq_attr *sq_attr);
+struct mlx5_devx_obj *mlx5_devx_cmd_create_tis
+	(struct ibv_context *ctx, struct mlx5_devx_tis_attr *tis_attr);
+struct mlx5_devx_obj *mlx5_devx_cmd_create_td(struct ibv_context *ctx);
 
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_devx_cmds.c b/drivers/net/mlx5/mlx5_devx_cmds.c
index b072c37..917bbf9 100644
--- a/drivers/net/mlx5/mlx5_devx_cmds.c
+++ b/drivers/net/mlx5/mlx5_devx_cmds.c
@@ -709,3 +709,189 @@  struct mlx5_devx_obj *
 	rqt->id = MLX5_GET(create_rqt_out, out, rqtn);
 	return rqt;
 }
+
+/**
+ * Create SQ using DevX API.
+ *
+ * @param[in] ctx
+ *   ibv_context returned from mlx5dv_open_device.
+ * @param [in] sq_attr
+ *   Pointer to SQ attributes structure.
+ * @param [in] socket
+ *   CPU socket ID for allocations.
+ *
+ * @return
+ *   The DevX object created, NULL otherwise and rte_errno is set.
+ **/
+struct mlx5_devx_obj *
+mlx5_devx_cmd_create_sq(struct ibv_context *ctx,
+			struct mlx5_devx_create_sq_attr *sq_attr)
+{
+	uint32_t in[MLX5_ST_SZ_DW(create_sq_in)] = {0};
+	uint32_t out[MLX5_ST_SZ_DW(create_sq_out)] = {0};
+	void *sq_ctx;
+	void *wq_ctx;
+	struct mlx5_devx_wq_attr *wq_attr;
+	struct mlx5_devx_obj *sq = NULL;
+
+	sq = rte_calloc(__func__, 1, sizeof(*sq), 0);
+	if (!sq) {
+		DRV_LOG(ERR, "Failed to allocate SQ data");
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+	MLX5_SET(create_sq_in, in, opcode, MLX5_CMD_OP_CREATE_SQ);
+	sq_ctx = MLX5_ADDR_OF(create_sq_in, in, ctx);
+	MLX5_SET(sqc, sq_ctx, rlky, sq_attr->rlky);
+	MLX5_SET(sqc, sq_ctx, cd_master, sq_attr->cd_master);
+	MLX5_SET(sqc, sq_ctx, fre, sq_attr->fre);
+	MLX5_SET(sqc, sq_ctx, flush_in_error_en, sq_attr->flush_in_error_en);
+	MLX5_SET(sqc, sq_ctx, allow_multi_pkt_send_wqe,
+		 sq_attr->flush_in_error_en);
+	MLX5_SET(sqc, sq_ctx, min_wqe_inline_mode,
+		 sq_attr->min_wqe_inline_mode);
+	MLX5_SET(sqc, sq_ctx, state, sq_attr->state);
+	MLX5_SET(sqc, sq_ctx, reg_umr, sq_attr->reg_umr);
+	MLX5_SET(sqc, sq_ctx, allow_swp, sq_attr->allow_swp);
+	MLX5_SET(sqc, sq_ctx, hairpin, sq_attr->hairpin);
+	MLX5_SET(sqc, sq_ctx, user_index, sq_attr->user_index);
+	MLX5_SET(sqc, sq_ctx, cqn, sq_attr->cqn);
+	MLX5_SET(sqc, sq_ctx, packet_pacing_rate_limit_index,
+		 sq_attr->packet_pacing_rate_limit_index);
+	MLX5_SET(sqc, sq_ctx, tis_lst_sz, sq_attr->tis_lst_sz);
+	MLX5_SET(sqc, sq_ctx, tis_num_0, sq_attr->tis_num);
+	wq_ctx = MLX5_ADDR_OF(sqc, sq_ctx, wq);
+	wq_attr = &sq_attr->wq_attr;
+	devx_cmd_fill_wq_data(wq_ctx, wq_attr);
+	sq->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
+					     out, sizeof(out));
+	if (!sq->obj) {
+		DRV_LOG(ERR, "Failed to create SQ using DevX");
+		rte_errno = errno;
+		rte_free(sq);
+		return NULL;
+	}
+	sq->id = MLX5_GET(create_sq_out, out, sqn);
+	return sq;
+}
+
+/**
+ * Modify SQ using DevX API.
+ *
+ * @param[in] sq
+ *   Pointer to SQ object structure.
+ * @param [in] sq_attr
+ *   Pointer to SQ attributes structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_devx_cmd_modify_sq(struct mlx5_devx_obj *sq,
+			struct mlx5_devx_modify_sq_attr *sq_attr)
+{
+	uint32_t in[MLX5_ST_SZ_DW(modify_sq_in)] = {0};
+	uint32_t out[MLX5_ST_SZ_DW(modify_sq_out)] = {0};
+	void *sq_ctx;
+	int ret;
+
+	MLX5_SET(modify_sq_in, in, opcode, MLX5_CMD_OP_MODIFY_SQ);
+	MLX5_SET(modify_sq_in, in, sq_state, sq_attr->sq_state);
+	MLX5_SET(modify_sq_in, in, sqn, sq->id);
+	sq_ctx = MLX5_ADDR_OF(modify_sq_in, in, ctx);
+	MLX5_SET(sqc, sq_ctx, state, sq_attr->state);
+	MLX5_SET(sqc, sq_ctx, hairpin_peer_rq, sq_attr->hairpin_peer_rq);
+	MLX5_SET(sqc, sq_ctx, hairpin_peer_vhca, sq_attr->hairpin_peer_vhca);
+	ret = mlx5_glue->devx_obj_modify(sq->obj, in, sizeof(in),
+					 out, sizeof(out));
+	if (ret) {
+		DRV_LOG(ERR, "Failed to modify SQ using DevX");
+		rte_errno = errno;
+		return -errno;
+	}
+	return ret;
+}
+
+/**
+ * Create TIS using DevX API.
+ *
+ * @param[in] ctx
+ *   ibv_context returned from mlx5dv_open_device.
+ * @param [in] tis_attr
+ *   Pointer to TIS attributes structure.
+ *
+ * @return
+ *   The DevX object created, NULL otherwise and rte_errno is set.
+ */
+struct mlx5_devx_obj *
+mlx5_devx_cmd_create_tis(struct ibv_context *ctx,
+			 struct mlx5_devx_tis_attr *tis_attr)
+{
+	uint32_t in[MLX5_ST_SZ_DW(create_tis_in)] = {0};
+	uint32_t out[MLX5_ST_SZ_DW(create_tis_out)] = {0};
+	struct mlx5_devx_obj *tis = NULL;
+	void *tis_ctx;
+
+	tis = rte_calloc(__func__, 1, sizeof(*tis), 0);
+	if (!tis) {
+		DRV_LOG(ERR, "Failed to allocate TIS object");
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+	MLX5_SET(create_tis_in, in, opcode, MLX5_CMD_OP_CREATE_TIS);
+	tis_ctx = MLX5_ADDR_OF(create_tis_in, in, ctx);
+	MLX5_SET(tisc, tis_ctx, strict_lag_tx_port_affinity,
+		 tis_attr->strict_lag_tx_port_affinity);
+	MLX5_SET(tisc, tis_ctx, strict_lag_tx_port_affinity,
+		 tis_attr->strict_lag_tx_port_affinity);
+	MLX5_SET(tisc, tis_ctx, prio, tis_attr->prio);
+	MLX5_SET(tisc, tis_ctx, transport_domain,
+		 tis_attr->transport_domain);
+	tis->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
+					      out, sizeof(out));
+	if (!tis->obj) {
+		DRV_LOG(ERR, "Failed to create TIS using DevX");
+		rte_errno = errno;
+		rte_free(tis);
+		return NULL;
+	}
+	tis->id = MLX5_GET(create_tis_out, out, tisn);
+	return tis;
+}
+
+/**
+ * Create transport domain using DevX API.
+ *
+ * @param[in] ctx
+ *   ibv_context returned from mlx5dv_open_device.
+ *
+ * @return
+ *   The DevX object created, NULL otherwise and rte_errno is set.
+ */
+struct mlx5_devx_obj *
+mlx5_devx_cmd_create_td(struct ibv_context *ctx)
+{
+	uint32_t in[MLX5_ST_SZ_DW(alloc_transport_domain_in)] = {0};
+	uint32_t out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {0};
+	struct mlx5_devx_obj *td = NULL;
+
+	td = rte_calloc(__func__, 1, sizeof(*td), 0);
+	if (!td) {
+		DRV_LOG(ERR, "Failed to allocate TD object");
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+	MLX5_SET(alloc_transport_domain_in, in, opcode,
+		 MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN);
+	td->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in),
+					     out, sizeof(out));
+	if (!td->obj) {
+		DRV_LOG(ERR, "Failed to create TIS using DevX");
+		rte_errno = errno;
+		rte_free(td);
+		return NULL;
+	}
+	td->id = MLX5_GET(alloc_transport_domain_out, out,
+			   transport_domain);
+	return td;
+}
diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
index 3765df0..faa7996 100644
--- a/drivers/net/mlx5/mlx5_prm.h
+++ b/drivers/net/mlx5/mlx5_prm.h
@@ -666,9 +666,13 @@  enum {
 	MLX5_CMD_OP_QUERY_HCA_CAP = 0x100,
 	MLX5_CMD_OP_CREATE_MKEY = 0x200,
 	MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT = 0x754,
+	MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN = 0x816,
 	MLX5_CMD_OP_CREATE_TIR = 0x900,
+	MLX5_CMD_OP_CREATE_SQ = 0X904,
+	MLX5_CMD_OP_MODIFY_SQ = 0X905,
 	MLX5_CMD_OP_CREATE_RQ = 0x908,
 	MLX5_CMD_OP_MODIFY_RQ = 0x909,
+	MLX5_CMD_OP_CREATE_TIS = 0x912,
 	MLX5_CMD_OP_QUERY_TIS = 0x915,
 	MLX5_CMD_OP_CREATE_RQT = 0x916,
 	MLX5_CMD_OP_ALLOC_FLOW_COUNTER = 0x939,
@@ -1311,6 +1315,23 @@  struct mlx5_ifc_query_tis_in_bits {
 	u8 reserved_at_60[0x20];
 };
 
+struct mlx5_ifc_alloc_transport_domain_out_bits {
+	u8 status[0x8];
+	u8 reserved_at_8[0x18];
+	u8 syndrome[0x20];
+	u8 reserved_at_40[0x8];
+	u8 transport_domain[0x18];
+	u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_alloc_transport_domain_in_bits {
+	u8 opcode[0x10];
+	u8 reserved_at_10[0x10];
+	u8 reserved_at_20[0x10];
+	u8 op_mod[0x10];
+	u8 reserved_at_40[0x40];
+};
+
 enum {
 	MLX5_WQ_TYPE_LINKED_LIST                = 0x0,
 	MLX5_WQ_TYPE_CYCLIC                     = 0x1,
@@ -1427,6 +1448,24 @@  struct mlx5_ifc_modify_rq_out_bits {
 	u8 reserved_at_40[0x40];
 };
 
+struct mlx5_ifc_create_tis_out_bits {
+	u8 status[0x8];
+	u8 reserved_at_8[0x18];
+	u8 syndrome[0x20];
+	u8 reserved_at_40[0x8];
+	u8 tisn[0x18];
+	u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_create_tis_in_bits {
+	u8 opcode[0x10];
+	u8 uid[0x10];
+	u8 reserved_at_20[0x10];
+	u8 op_mod[0x10];
+	u8 reserved_at_40[0xc0];
+	struct mlx5_ifc_tisc_bits ctx;
+};
+
 enum {
 	MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_WQ_LWM = 1ULL << 0,
 	MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD = 1ULL << 1,
@@ -1572,6 +1611,85 @@  struct mlx5_ifc_create_rqt_in_bits {
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
 
+struct mlx5_ifc_sqc_bits {
+	u8 rlky[0x1];
+	u8 cd_master[0x1];
+	u8 fre[0x1];
+	u8 flush_in_error_en[0x1];
+	u8 allow_multi_pkt_send_wqe[0x1];
+	u8 min_wqe_inline_mode[0x3];
+	u8 state[0x4];
+	u8 reg_umr[0x1];
+	u8 allow_swp[0x1];
+	u8 hairpin[0x1];
+	u8 reserved_at_f[0x11];
+	u8 reserved_at_20[0x8];
+	u8 user_index[0x18];
+	u8 reserved_at_40[0x8];
+	u8 cqn[0x18];
+	u8 reserved_at_60[0x8];
+	u8 hairpin_peer_rq[0x18];
+	u8 reserved_at_80[0x10];
+	u8 hairpin_peer_vhca[0x10];
+	u8 reserved_at_a0[0x50];
+	u8 packet_pacing_rate_limit_index[0x10];
+	u8 tis_lst_sz[0x10];
+	u8 reserved_at_110[0x10];
+	u8 reserved_at_120[0x40];
+	u8 reserved_at_160[0x8];
+	u8 tis_num_0[0x18];
+	struct mlx5_ifc_wq_bits wq;
+};
+
+struct mlx5_ifc_query_sq_in_bits {
+	u8 opcode[0x10];
+	u8 reserved_at_10[0x10];
+	u8 reserved_at_20[0x10];
+	u8 op_mod[0x10];
+	u8 reserved_at_40[0x8];
+	u8 sqn[0x18];
+	u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_modify_sq_out_bits {
+	u8 status[0x8];
+	u8 reserved_at_8[0x18];
+	u8 syndrome[0x20];
+	u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_modify_sq_in_bits {
+	u8 opcode[0x10];
+	u8 uid[0x10];
+	u8 reserved_at_20[0x10];
+	u8 op_mod[0x10];
+	u8 sq_state[0x4];
+	u8 reserved_at_44[0x4];
+	u8 sqn[0x18];
+	u8 reserved_at_60[0x20];
+	u8 modify_bitmask[0x40];
+	u8 reserved_at_c0[0x40];
+	struct mlx5_ifc_sqc_bits ctx;
+};
+
+struct mlx5_ifc_create_sq_out_bits {
+	u8 status[0x8];
+	u8 reserved_at_8[0x18];
+	u8 syndrome[0x20];
+	u8 reserved_at_40[0x8];
+	u8 sqn[0x18];
+	u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_create_sq_in_bits {
+	u8 opcode[0x10];
+	u8 uid[0x10];
+	u8 reserved_at_20[0x10];
+	u8 op_mod[0x10];
+	u8 reserved_at_40[0xc0];
+	struct mlx5_ifc_sqc_bits ctx;
+};
+
 /* CQE format mask. */
 #define MLX5E_CQE_FORMAT_MASK 0xc
 
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 12f9bfb..271b648 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -324,14 +324,18 @@  struct mlx5_txq_obj {
 	LIST_ENTRY(mlx5_txq_obj) next; /* Pointer to the next element. */
 	rte_atomic32_t refcnt; /* Reference counter. */
 	struct mlx5_txq_ctrl *txq_ctrl; /* Pointer to the control queue. */
-	enum mlx5_rxq_obj_type type; /* The txq object type. */
+	enum mlx5_txq_obj_type type; /* The txq object type. */
 	RTE_STD_C11
 	union {
 		struct {
 			struct ibv_cq *cq; /* Completion Queue. */
 			struct ibv_qp *qp; /* Queue Pair. */
 		};
-		struct mlx5_devx_obj *sq; /* DevX object for Sx queue. */
+		struct {
+			struct mlx5_devx_obj *sq;
+			/* DevX object for Sx queue. */
+			struct mlx5_devx_obj *tis; /* The TIS object. */
+		};
 	};
 };
 
@@ -348,6 +352,7 @@  struct mlx5_txq_ctrl {
 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
 	void *bf_reg; /* BlueFlame register from Verbs. */
 	uint16_t dump_file_n; /* Number of dump files. */
+	struct rte_eth_hairpin_conf hairpin_conf; /* Hairpin configuration. */
 	struct mlx5_txq_data txq; /* Data path structure. */
 	/* Must be the last field in the structure, contains elts[]. */
 };
@@ -410,15 +415,22 @@  struct mlx5_hrxq *mlx5_hrxq_get(struct rte_eth_dev *dev,
 
 int mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 			unsigned int socket, const struct rte_eth_txconf *conf);
+int mlx5_tx_hairpin_queue_setup
+	(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
+	 const struct rte_eth_hairpin_conf *hairpin_conf);
 void mlx5_tx_queue_release(void *dpdk_txq);
 int mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd);
-struct mlx5_txq_obj *mlx5_txq_obj_new(struct rte_eth_dev *dev, uint16_t idx);
+struct mlx5_txq_obj *mlx5_txq_obj_new(struct rte_eth_dev *dev, uint16_t idx,
+				      enum mlx5_txq_obj_type type);
 struct mlx5_txq_obj *mlx5_txq_obj_get(struct rte_eth_dev *dev, uint16_t idx);
 int mlx5_txq_obj_release(struct mlx5_txq_obj *txq_ibv);
 int mlx5_txq_obj_verify(struct rte_eth_dev *dev);
 struct mlx5_txq_ctrl *mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx,
 				   uint16_t desc, unsigned int socket,
 				   const struct rte_eth_txconf *conf);
+struct mlx5_txq_ctrl *mlx5_txq_hairpin_new
+	(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
+	 const struct rte_eth_hairpin_conf *hairpin_conf);
 struct mlx5_txq_ctrl *mlx5_txq_get(struct rte_eth_dev *dev, uint16_t idx);
 int mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx);
 int mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx);
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 50c4df5..3ec86c4 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -51,8 +51,14 @@ 
 
 		if (!txq_ctrl)
 			continue;
-		txq_alloc_elts(txq_ctrl);
-		txq_ctrl->obj = mlx5_txq_obj_new(dev, i);
+		if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN) {
+			txq_ctrl->obj = mlx5_txq_obj_new
+				(dev, i, MLX5_TXQ_OBJ_TYPE_DEVX_HAIRPIN);
+		} else {
+			txq_alloc_elts(txq_ctrl);
+			txq_ctrl->obj = mlx5_txq_obj_new
+				(dev, i, MLX5_TXQ_OBJ_TYPE_IBV);
+		}
 		if (!txq_ctrl->obj) {
 			rte_errno = ENOMEM;
 			goto error;
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index a6e2563..f9bfe31 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -136,30 +136,22 @@ 
 }
 
 /**
- * DPDK callback to configure a TX queue.
+ * Tx queue presetup checks.
  *
  * @param dev
  *   Pointer to Ethernet device structure.
  * @param idx
- *   TX queue index.
+ *   Tx queue index.
  * @param desc
  *   Number of descriptors to configure in queue.
- * @param socket
- *   NUMA socket on which memory must be allocated.
- * @param[in] conf
- *   Thresholds parameters.
  *
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-int
-mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
-		    unsigned int socket, const struct rte_eth_txconf *conf)
+static int
+mlx5_tx_queue_pre_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_txq_data *txq = (*priv->txqs)[idx];
-	struct mlx5_txq_ctrl *txq_ctrl =
-		container_of(txq, struct mlx5_txq_ctrl, txq);
 
 	if (desc <= MLX5_TX_COMP_THRESH) {
 		DRV_LOG(WARNING,
@@ -191,6 +183,38 @@ 
 		return -rte_errno;
 	}
 	mlx5_txq_release(dev, idx);
+	return 0;
+}
+/**
+ * DPDK callback to configure a TX queue.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param idx
+ *   TX queue index.
+ * @param desc
+ *   Number of descriptors to configure in queue.
+ * @param socket
+ *   NUMA socket on which memory must be allocated.
+ * @param[in] conf
+ *   Thresholds parameters.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
+		    unsigned int socket, const struct rte_eth_txconf *conf)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_txq_data *txq = (*priv->txqs)[idx];
+	struct mlx5_txq_ctrl *txq_ctrl =
+		container_of(txq, struct mlx5_txq_ctrl, txq);
+	int res;
+
+	res = mlx5_tx_queue_pre_setup(dev, idx, desc);
+	if (res)
+		return res;
 	txq_ctrl = mlx5_txq_new(dev, idx, desc, socket, conf);
 	if (!txq_ctrl) {
 		DRV_LOG(ERR, "port %u unable to allocate queue index %u",
@@ -204,6 +228,57 @@ 
 }
 
 /**
+ * DPDK callback to configure a TX hairpin queue.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param idx
+ *   TX queue index.
+ * @param desc
+ *   Number of descriptors to configure in queue.
+ * @param[in] hairpin_conf
+ *   The hairpin binding configuration.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_tx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
+			    uint16_t desc,
+			    const struct rte_eth_hairpin_conf *hairpin_conf)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_txq_data *txq = (*priv->txqs)[idx];
+	struct mlx5_txq_ctrl *txq_ctrl =
+		container_of(txq, struct mlx5_txq_ctrl, txq);
+	int res;
+
+	res = mlx5_tx_queue_pre_setup(dev, idx, desc);
+	if (res)
+		return res;
+	if (hairpin_conf->peer_n != 1 ||
+	    hairpin_conf->peers[0].port != dev->data->port_id ||
+	    hairpin_conf->peers[0].queue >= priv->rxqs_n) {
+		DRV_LOG(ERR, "port %u unable to setup hairpin queue index %u "
+			" invalid hairpind configuration", dev->data->port_id,
+			idx);
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+	txq_ctrl = mlx5_txq_hairpin_new(dev, idx, desc,	hairpin_conf);
+	if (!txq_ctrl) {
+		DRV_LOG(ERR, "port %u unable to allocate queue index %u",
+			dev->data->port_id, idx);
+		return -rte_errno;
+	}
+	DRV_LOG(DEBUG, "port %u adding Tx queue %u to list",
+		dev->data->port_id, idx);
+	(*priv->txqs)[idx] = &txq_ctrl->txq;
+	txq_ctrl->type = MLX5_TXQ_TYPE_HAIRPIN;
+	return 0;
+}
+
+/**
  * DPDK callback to release a TX queue.
  *
  * @param dpdk_txq
@@ -246,6 +321,8 @@ 
 	const size_t page_size = sysconf(_SC_PAGESIZE);
 #endif
 
+	if (txq_ctrl->type != MLX5_TXQ_TYPE_STANDARD)
+		return;
 	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
 	assert(ppriv);
 	ppriv->uar_table[txq_ctrl->txq.idx] = txq_ctrl->bf_reg;
@@ -282,6 +359,8 @@ 
 	uintptr_t offset;
 	const size_t page_size = sysconf(_SC_PAGESIZE);
 
+	if (txq_ctrl->type != MLX5_TXQ_TYPE_STANDARD)
+		return 0;
 	assert(ppriv);
 	/*
 	 * As rdma-core, UARs are mapped in size of OS page
@@ -316,6 +395,8 @@ 
 	const size_t page_size = sysconf(_SC_PAGESIZE);
 	void *addr;
 
+	if (txq_ctrl->type != MLX5_TXQ_TYPE_STANDARD)
+		return;
 	addr = ppriv->uar_table[txq_ctrl->txq.idx];
 	munmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size);
 }
@@ -346,6 +427,8 @@ 
 			continue;
 		txq = (*priv->txqs)[i];
 		txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
+		if (txq_ctrl->type != MLX5_TXQ_TYPE_STANDARD)
+			continue;
 		assert(txq->idx == (uint16_t)i);
 		ret = txq_uar_init_secondary(txq_ctrl, fd);
 		if (ret)
@@ -365,18 +448,87 @@ 
 }
 
 /**
+ * Create the Tx hairpin queue object.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ * @param idx
+ *   Queue index in DPDK Tx queue array
+ *
+ * @return
+ *   The hairpin DevX object initialised, NULL otherwise and rte_errno is set.
+ */
+static struct mlx5_txq_obj *
+mlx5_txq_obj_hairpin_new(struct rte_eth_dev *dev, uint16_t idx)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_txq_data *txq_data = (*priv->txqs)[idx];
+	struct mlx5_txq_ctrl *txq_ctrl =
+		container_of(txq_data, struct mlx5_txq_ctrl, txq);
+	struct mlx5_devx_create_sq_attr attr = { 0 };
+	struct mlx5_txq_obj *tmpl = NULL;
+	int ret = 0;
+
+	assert(txq_data);
+	assert(!txq_ctrl->obj);
+	tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0,
+				 txq_ctrl->socket);
+	if (!tmpl) {
+		DRV_LOG(ERR,
+			"port %u Tx queue %u cannot allocate memory resources",
+			dev->data->port_id, txq_data->idx);
+		rte_errno = ENOMEM;
+		goto error;
+	}
+	tmpl->type = MLX5_TXQ_OBJ_TYPE_DEVX_HAIRPIN;
+	tmpl->txq_ctrl = txq_ctrl;
+	attr.hairpin = 1;
+	attr.tis_lst_sz = 1;
+	/* Workaround for hairpin startup */
+	attr.wq_attr.log_hairpin_num_packets = log2above(32);
+	/* Workaround for packets larger than 1KB */
+	attr.wq_attr.log_hairpin_data_sz =
+			priv->config.hca_attr.log_max_hairpin_wq_data_sz;
+	attr.tis_num = priv->sh->tis->id;
+	tmpl->sq = mlx5_devx_cmd_create_sq(priv->sh->ctx, &attr);
+	if (!tmpl->sq) {
+		DRV_LOG(ERR,
+			"port %u tx hairpin queue %u can't create sq object",
+			dev->data->port_id, idx);
+		rte_errno = errno;
+		goto error;
+	}
+	DRV_LOG(DEBUG, "port %u sxq %u updated with %p", dev->data->port_id,
+		idx, (void *)&tmpl);
+	rte_atomic32_inc(&tmpl->refcnt);
+	LIST_INSERT_HEAD(&priv->txqsobj, tmpl, next);
+	return tmpl;
+error:
+	ret = rte_errno; /* Save rte_errno before cleanup. */
+	if (tmpl->tis)
+		mlx5_devx_cmd_destroy(tmpl->tis);
+	if (tmpl->sq)
+		mlx5_devx_cmd_destroy(tmpl->sq);
+	rte_errno = ret; /* Restore rte_errno. */
+	return NULL;
+}
+
+/**
  * Create the Tx queue Verbs object.
  *
  * @param dev
  *   Pointer to Ethernet device.
  * @param idx
  *   Queue index in DPDK Tx queue array.
+ * @param type
+ *   Type of the Tx queue object to create.
  *
  * @return
  *   The Verbs object initialised, NULL otherwise and rte_errno is set.
  */
 struct mlx5_txq_obj *
-mlx5_txq_obj_new(struct rte_eth_dev *dev, uint16_t idx)
+mlx5_txq_obj_new(struct rte_eth_dev *dev, uint16_t idx,
+		 enum mlx5_txq_obj_type type)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_txq_data *txq_data = (*priv->txqs)[idx];
@@ -396,6 +548,8 @@  struct mlx5_txq_obj *
 	const int desc = 1 << txq_data->elts_n;
 	int ret = 0;
 
+	if (type == MLX5_TXQ_OBJ_TYPE_DEVX_HAIRPIN)
+		return mlx5_txq_obj_hairpin_new(dev, idx);
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 	/* If using DevX, need additional mask to read tisn value. */
 	if (priv->config.devx && !priv->sh->tdn)
@@ -643,8 +797,13 @@  struct mlx5_txq_obj *
 {
 	assert(txq_obj);
 	if (rte_atomic32_dec_and_test(&txq_obj->refcnt)) {
-		claim_zero(mlx5_glue->destroy_qp(txq_obj->qp));
-		claim_zero(mlx5_glue->destroy_cq(txq_obj->cq));
+		if (txq_obj->type == MLX5_TXQ_OBJ_TYPE_DEVX_HAIRPIN) {
+			if (txq_obj->tis)
+				claim_zero(mlx5_devx_cmd_destroy(txq_obj->tis));
+		} else {
+			claim_zero(mlx5_glue->destroy_qp(txq_obj->qp));
+			claim_zero(mlx5_glue->destroy_cq(txq_obj->cq));
+		}
 		LIST_REMOVE(txq_obj, next);
 		rte_free(txq_obj);
 		return 0;
@@ -1100,6 +1259,7 @@  struct mlx5_txq_ctrl *
 		goto error;
 	}
 	rte_atomic32_inc(&tmpl->refcnt);
+	tmpl->type = MLX5_TXQ_TYPE_STANDARD;
 	LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next);
 	return tmpl;
 error:
@@ -1108,6 +1268,46 @@  struct mlx5_txq_ctrl *
 }
 
 /**
+ * Create a DPDK Tx hairpin queue.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ * @param idx
+ *   TX queue index.
+ * @param desc
+ *   Number of descriptors to configure in queue.
+ * @param hairpin_conf
+ *  The hairpin configuration.
+ *
+ * @return
+ *   A DPDK queue object on success, NULL otherwise and rte_errno is set.
+ */
+struct mlx5_txq_ctrl *
+mlx5_txq_hairpin_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
+		     const struct rte_eth_hairpin_conf *hairpin_conf)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_txq_ctrl *tmpl;
+
+	tmpl = rte_calloc_socket("TXQ", 1,
+				 sizeof(*tmpl), 0, SOCKET_ID_ANY);
+	if (!tmpl) {
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+	tmpl->priv = priv;
+	tmpl->socket = SOCKET_ID_ANY;
+	tmpl->txq.elts_n = log2above(desc);
+	tmpl->txq.port_id = dev->data->port_id;
+	tmpl->txq.idx = idx;
+	tmpl->hairpin_conf = *hairpin_conf;
+	tmpl->type = MLX5_TXQ_TYPE_HAIRPIN;
+	rte_atomic32_inc(&tmpl->refcnt);
+	LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next);
+	return tmpl;
+}
+
+/**
  * Get a Tx queue.
  *
  * @param dev