diff mbox series

[v3,10/11] compress/mlx5: support 32-bit systems

Message ID 1611142175-409485-11-git-send-email-matan@nvidia.com (mailing list archive)
State Accepted, archived
Delegated to: akhil goyal
Headers show
Series add mlx5 compress PMD | expand

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Matan Azrad Jan. 20, 2021, 11:29 a.m. UTC
In order to support 32-bit systems, the 8B doorbell write should be
done by 2 4B stores.

The order between the store is important, that's why memory barrier
should be used between them.

The doorbell address is shared between all the queues, that's why a lock
should wrap the 2 stores.

Signed-off-by: Matan Azrad <matan@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
 drivers/compress/mlx5/mlx5_compress.c | 32 ++++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/drivers/compress/mlx5/mlx5_compress.c b/drivers/compress/mlx5/mlx5_compress.c
index 7a43d9e..f7ef913 100644
--- a/drivers/compress/mlx5/mlx5_compress.c
+++ b/drivers/compress/mlx5/mlx5_compress.c
@@ -50,6 +50,10 @@  struct mlx5_compress_priv {
 	LIST_HEAD(xform_list, mlx5_compress_xform) xform_list;
 	rte_spinlock_t xform_sl;
 	struct mlx5_mr_share_cache mr_scache; /* Global shared MR cache. */
+	volatile uint64_t *uar_addr;
+#ifndef RTE_ARCH_64
+	rte_spinlock_t uar32_sl;
+#endif /* RTE_ARCH_64 */
 };
 
 struct mlx5_compress_qp {
@@ -57,7 +61,6 @@  struct mlx5_compress_qp {
 	uint16_t entries_n;
 	uint16_t pi;
 	uint16_t ci;
-	volatile uint64_t *uar_addr;
 	struct mlx5_mr_ctrl mr_ctrl;
 	int socket_id;
 	struct mlx5_devx_cq cq;
@@ -208,8 +211,6 @@  struct mlx5_compress_qp {
 	qp->priv = priv;
 	qp->ops = (struct rte_comp_op **)RTE_ALIGN((uintptr_t)(qp + 1),
 						   RTE_CACHE_LINE_SIZE);
-	qp->uar_addr = mlx5_os_get_devx_uar_reg_addr(priv->uar);
-	MLX5_ASSERT(qp->uar_addr);
 	if (mlx5_common_verbs_reg_mr(priv->pd, opaq_buf, qp->entries_n *
 					sizeof(struct mlx5_gga_compress_opaque),
 							 &qp->opaque_mr) != 0) {
@@ -423,6 +424,24 @@  struct mlx5_compress_qp {
 	return dseg->lkey;
 }
 
+/*
+ * Provide safe 64bit store operation to mlx5 UAR region for both 32bit and
+ * 64bit architectures.
+ */
+static __rte_always_inline void
+mlx5_compress_uar_write(uint64_t val, struct mlx5_compress_priv *priv)
+{
+#ifdef RTE_ARCH_64
+	*priv->uar_addr = val;
+#else /* !RTE_ARCH_64 */
+	rte_spinlock_lock(&priv->uar32_sl);
+	*(volatile uint32_t *)priv->uar_addr = val;
+	rte_io_wmb();
+	*((volatile uint32_t *)priv->uar_addr + 1) = val >> 32;
+	rte_spinlock_unlock(&priv->uar32_sl);
+#endif
+}
+
 static uint16_t
 mlx5_compress_enqueue_burst(void *queue_pair, struct rte_comp_op **ops,
 			    uint16_t nb_ops)
@@ -486,7 +505,7 @@  struct mlx5_compress_qp {
 	rte_io_wmb();
 	qp->sq.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(qp->pi);
 	rte_wmb();
-	*qp->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH.*/
+	mlx5_compress_uar_write(*(volatile uint64_t *)wqe, qp->priv);
 	rte_wmb();
 	return nb_ops;
 }
@@ -692,6 +711,11 @@  struct mlx5_compress_qp {
 		DRV_LOG(ERR, "Failed to allocate UAR.");
 		return -1;
 	}
+	priv->uar_addr = mlx5_os_get_devx_uar_reg_addr(priv->uar);
+	MLX5_ASSERT(qp->uar_addr);
+#ifndef RTE_ARCH_64
+	rte_spinlock_init(&priv->uar32_sl);
+#endif /* RTE_ARCH_64 */
 	return 0;
 }