[5/8] net/mlx5: prepare switch flow rule parser for encap offloads
diff mbox series

Message ID 20180831092038.23051-6-adrien.mazarguil@6wind.com
State Superseded, archived
Headers show
Series
  • net/mlx5: add switch offload for VXLAN encap/decap
Related show

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Adrien Mazarguil Aug. 31, 2018, 9:57 a.m. UTC
A mere message buffer is not enough to support the additional logic
required to manage flow rules with such offloads; a dedicated object
(struct mlx5_nl_flow) with the ability to store additional information and
adjustable target network interfaces is needed, as well as a context
object for shared data (struct mlx5_nl_flow_ctx).

A predictable message sequence number can now be stored in the context
object as an improvement over CPU counters.

Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
---
 drivers/net/mlx5/mlx5.c         |  18 ++--
 drivers/net/mlx5/mlx5.h         |  22 ++--
 drivers/net/mlx5/mlx5_flow.c    |  10 +-
 drivers/net/mlx5/mlx5_nl_flow.c | 189 ++++++++++++++++++++++++-----------
 4 files changed, 155 insertions(+), 84 deletions(-)

Patch
diff mbox series

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 9a504a31c..c10ca4ae5 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -282,8 +282,8 @@  mlx5_dev_close(struct rte_eth_dev *dev)
 		close(priv->nl_socket_route);
 	if (priv->nl_socket_rdma >= 0)
 		close(priv->nl_socket_rdma);
-	if (priv->mnl_socket)
-		mlx5_nl_flow_socket_destroy(priv->mnl_socket);
+	if (priv->nl_flow_ctx)
+		mlx5_nl_flow_ctx_destroy(priv->nl_flow_ctx);
 	ret = mlx5_hrxq_ibv_verify(dev);
 	if (ret)
 		DRV_LOG(WARNING, "port %u some hash Rx queue still remain",
@@ -1136,13 +1136,13 @@  mlx5_dev_spawn(struct rte_device *dpdk_dev,
 	claim_zero(mlx5_mac_addr_add(eth_dev, &mac, 0, 0));
 	if (vf && config.vf_nl_en)
 		mlx5_nl_mac_addr_sync(eth_dev);
-	priv->mnl_socket = mlx5_nl_flow_socket_create();
-	if (!priv->mnl_socket ||
+	priv->nl_flow_ctx = mlx5_nl_flow_ctx_create(eth_dev->device->numa_node);
+	if (!priv->nl_flow_ctx ||
 	    !priv->ifindex ||
-	    mlx5_nl_flow_ifindex_init(priv->mnl_socket, priv->ifindex,
+	    mlx5_nl_flow_ifindex_init(priv->nl_flow_ctx, priv->ifindex,
 				      &flow_error)) {
-		if (!priv->mnl_socket) {
-			flow_error.message = "cannot open libmnl socket";
+		if (!priv->nl_flow_ctx) {
+			flow_error.message = "cannot create NL flow context";
 		} else if (!priv->ifindex) {
 			rte_errno = ENXIO;
 			flow_error.message = "unknown network interface index";
@@ -1204,8 +1204,8 @@  mlx5_dev_spawn(struct rte_device *dpdk_dev,
 			close(priv->nl_socket_route);
 		if (priv->nl_socket_rdma >= 0)
 			close(priv->nl_socket_rdma);
-		if (priv->mnl_socket)
-			mlx5_nl_flow_socket_destroy(priv->mnl_socket);
+		if (priv->nl_flow_ctx)
+			mlx5_nl_flow_ctx_destroy(priv->nl_flow_ctx);
 		if (own_domain_id)
 			claim_zero(rte_eth_switch_domain_free(priv->domain_id));
 		rte_free(priv);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 287cfc643..210f4ea11 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -162,7 +162,8 @@  struct mlx5_nl_flow_ptoi {
 	unsigned int ifindex; /**< Network interface index. */
 };
 
-struct mnl_socket;
+struct mlx5_nl_flow;
+struct mlx5_nl_flow_ctx;
 
 struct priv {
 	LIST_ENTRY(priv) mem_event_cb; /* Called by memory event callback. */
@@ -229,7 +230,7 @@  struct priv {
 	rte_spinlock_t uar_lock[MLX5_UAR_PAGE_NUM_MAX];
 	/* UAR same-page access control required in 32bit implementations. */
 #endif
-	struct mnl_socket *mnl_socket; /* Libmnl socket. */
+	struct mlx5_nl_flow_ctx *nl_flow_ctx; /* Context for NL flow rules. */
 };
 
 #define PORT_ID(priv) ((priv)->dev_data->port_id)
@@ -396,21 +397,24 @@  int mlx5_nl_switch_info(int nl, unsigned int ifindex,
 
 /* mlx5_nl_flow.c */
 
-int mlx5_nl_flow_transpose(void *buf,
+int mlx5_nl_flow_transpose(struct mlx5_nl_flow *nl_flow,
 			   size_t size,
 			   const struct mlx5_nl_flow_ptoi *ptoi,
 			   const struct rte_flow_attr *attr,
 			   const struct rte_flow_item *pattern,
 			   const struct rte_flow_action *actions,
 			   struct rte_flow_error *error);
-void mlx5_nl_flow_brand(void *buf, uint32_t handle);
-int mlx5_nl_flow_create(struct mnl_socket *nl, void *buf,
+void mlx5_nl_flow_brand(struct mlx5_nl_flow *nl_flow, uint32_t handle);
+int mlx5_nl_flow_create(struct mlx5_nl_flow_ctx *ctx,
+			struct mlx5_nl_flow *nl_flow,
 			struct rte_flow_error *error);
-int mlx5_nl_flow_destroy(struct mnl_socket *nl, void *buf,
+int mlx5_nl_flow_destroy(struct mlx5_nl_flow_ctx *ctx,
+			 struct mlx5_nl_flow *nl_flow,
 			 struct rte_flow_error *error);
-int mlx5_nl_flow_ifindex_init(struct mnl_socket *nl, unsigned int ifindex,
+int mlx5_nl_flow_ifindex_init(struct mlx5_nl_flow_ctx *ctx,
+			      unsigned int ifindex,
 			      struct rte_flow_error *error);
-struct mnl_socket *mlx5_nl_flow_socket_create(void);
-void mlx5_nl_flow_socket_destroy(struct mnl_socket *nl);
+struct mlx5_nl_flow_ctx *mlx5_nl_flow_ctx_create(int socket);
+void mlx5_nl_flow_ctx_destroy(struct mlx5_nl_flow_ctx *ctx);
 
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index f093a5ed0..77e510dc3 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -2485,7 +2485,7 @@  mlx5_flow_merge_switch(struct rte_eth_dev *dev,
 	ptoi[n].ifindex = 0;
 	if (flow_size < off)
 		flow_size = 0;
-	ret = mlx5_nl_flow_transpose((uint8_t *)flow + off,
+	ret = mlx5_nl_flow_transpose((void *)((uint8_t *)flow + off),
 				     flow_size ? flow_size - off : 0,
 				     ptoi, attr, pattern, actions, error);
 	if (ret < 0)
@@ -2885,8 +2885,8 @@  mlx5_flow_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
 	struct priv *priv = dev->data->dev_private;
 	struct mlx5_flow_verbs *verbs;
 
-	if (flow->nl_flow && priv->mnl_socket)
-		mlx5_nl_flow_destroy(priv->mnl_socket, flow->nl_flow, NULL);
+	if (flow->nl_flow && priv->nl_flow_ctx)
+		mlx5_nl_flow_destroy(priv->nl_flow_ctx, flow->nl_flow, NULL);
 	LIST_FOREACH(verbs, &flow->verbs, next) {
 		if (verbs->flow) {
 			claim_zero(mlx5_glue->destroy_flow(verbs->flow));
@@ -2975,8 +2975,8 @@  mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
 		}
 	}
 	if (flow->nl_flow &&
-	    priv->mnl_socket &&
-	    mlx5_nl_flow_create(priv->mnl_socket, flow->nl_flow, error))
+	    priv->nl_flow_ctx &&
+	    mlx5_nl_flow_create(priv->nl_flow_ctx, flow->nl_flow, error))
 		goto error;
 	return 0;
 error:
diff --git a/drivers/net/mlx5/mlx5_nl_flow.c b/drivers/net/mlx5/mlx5_nl_flow.c
index e720728b7..d20416026 100644
--- a/drivers/net/mlx5/mlx5_nl_flow.c
+++ b/drivers/net/mlx5/mlx5_nl_flow.c
@@ -22,10 +22,10 @@ 
 #include <sys/socket.h>
 
 #include <rte_byteorder.h>
-#include <rte_cycles.h>
 #include <rte_errno.h>
 #include <rte_ether.h>
 #include <rte_flow.h>
+#include <rte_malloc.h>
 
 #include "mlx5.h"
 #include "mlx5_autoconf.h"
@@ -148,6 +148,23 @@  struct tc_vlan {
 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
 #endif
 
+/** Context object required by most functions. */
+struct mlx5_nl_flow_ctx {
+	int socket; /**< NUMA socket for memory allocations. */
+	uint32_t seq; /**< Message sequence number for @p nl. */
+	struct mnl_socket *nl; /**< @p NETLINK_ROUTE libmnl socket. */
+};
+
+/** Flow rule descriptor. */
+struct mlx5_nl_flow {
+	uint32_t size; /**< Size of this object. */
+	uint32_t applied:1; /**< Whether rule is currently applied. */
+	unsigned int *ifindex_src; /**< Source interface. */
+	unsigned int *ifindex_dst; /**< Destination interface. */
+	alignas(struct nlmsghdr)
+	uint8_t msg[]; /**< Netlink message data. */
+};
+
 /** Parser state definitions for mlx5_nl_flow_trans[]. */
 enum mlx5_nl_flow_trans {
 	INVALID,
@@ -350,10 +367,10 @@  mlx5_nl_flow_item_mask(const struct rte_flow_item *item,
  * Subsequent entries enable this function to resolve other DPDK port IDs
  * found in the flow rule.
  *
- * @param[out] buf
- *   Output message buffer. May be NULL when @p size is 0.
+ * @param[out] nl_flow
+ *   Output buffer. May be NULL when @p size is 0.
  * @param size
- *   Size of @p buf. Message may be truncated if not large enough.
+ *   Size of @p nl_flow. May be truncated if not large enough.
  * @param[in] ptoi
  *   DPDK port ID to network interface index translation table. This table
  *   is terminated by an entry with a zero ifindex value.
@@ -372,7 +389,7 @@  mlx5_nl_flow_item_mask(const struct rte_flow_item *item,
  *   otherwise and rte_errno is set.
  */
 int
-mlx5_nl_flow_transpose(void *buf,
+mlx5_nl_flow_transpose(struct mlx5_nl_flow *nl_flow,
 		       size_t size,
 		       const struct mlx5_nl_flow_ptoi *ptoi,
 		       const struct rte_flow_attr *attr,
@@ -380,8 +397,9 @@  mlx5_nl_flow_transpose(void *buf,
 		       const struct rte_flow_action *actions,
 		       struct rte_flow_error *error)
 {
-	alignas(struct nlmsghdr)
-	uint8_t buf_tmp[mnl_nlmsg_size(sizeof(struct tcmsg) + 1024)];
+	alignas(struct mlx5_nl_flow)
+	uint8_t buf_tmp[1024];
+	void *buf;
 	const struct rte_flow_item *item;
 	const struct rte_flow_action *action;
 	unsigned int n;
@@ -398,9 +416,15 @@  mlx5_nl_flow_transpose(void *buf,
 	const enum mlx5_nl_flow_trans *trans;
 	const enum mlx5_nl_flow_trans *back;
 
-	if (!size)
-		goto error_nobufs;
 init:
+	buf = NULL;
+	if (size < offsetof(struct mlx5_nl_flow, msg))
+		goto error_nobufs;
+	nl_flow->size = offsetof(struct mlx5_nl_flow, msg);
+	nl_flow->applied = 0;
+	nl_flow->ifindex_src = NULL;
+	nl_flow->ifindex_dst = NULL;
+	size -= nl_flow->size;
 	item = pattern;
 	action = actions;
 	n = 0;
@@ -483,15 +507,21 @@  mlx5_nl_flow_transpose(void *buf,
 				(error, ENOTSUP,
 				 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
 				 attr, "egress is not supported");
-		if (size < mnl_nlmsg_size(sizeof(*tcm)))
+		i = RTE_ALIGN_CEIL(nl_flow->size, alignof(struct nlmsghdr));
+		i -= nl_flow->size;
+		if (size < i + mnl_nlmsg_size(sizeof(*tcm)))
 			goto error_nobufs;
+		nl_flow->size += i;
+		buf = (void *)((uintptr_t)nl_flow + nl_flow->size);
+		size -= i;
 		nlh = mnl_nlmsg_put_header(buf);
-		nlh->nlmsg_type = 0;
+		nlh->nlmsg_type = RTM_NEWTFILTER;
 		nlh->nlmsg_flags = 0;
 		nlh->nlmsg_seq = 0;
 		tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
 		tcm->tcm_family = AF_UNSPEC;
 		tcm->tcm_ifindex = ptoi[0].ifindex;
+		nl_flow->ifindex_src = (unsigned int *)&tcm->tcm_ifindex;
 		/*
 		 * Let kernel pick a handle by default. A predictable handle
 		 * can be set by the caller on the resulting buffer through
@@ -893,6 +923,10 @@  mlx5_nl_flow_transpose(void *buf,
 		act = mnl_attr_nest_start_check(buf, size, TCA_ACT_OPTIONS);
 		if (!act)
 			goto error_nobufs;
+		nl_flow->ifindex_dst =
+			&((struct tc_mirred *)
+			  mnl_attr_get_payload
+			  (mnl_nlmsg_get_payload_tail(buf)))->ifindex;
 		if (!mnl_attr_put_check(buf, size, TCA_MIRRED_PARMS,
 					sizeof(struct tc_mirred),
 					&(struct tc_mirred){
@@ -1014,15 +1048,18 @@  mlx5_nl_flow_transpose(void *buf,
 		if (na_flower)
 			mnl_attr_nest_end(buf, na_flower);
 		nlh = buf;
-		return nlh->nlmsg_len;
+		buf = NULL;
+		size -= nlh->nlmsg_len;
+		nl_flow->size += nlh->nlmsg_len;
+		return nl_flow->size;
 	}
 	back = trans;
 	trans = mlx5_nl_flow_trans[trans[n - 1]];
 	n = 0;
 	goto trans;
 error_nobufs:
-	if (buf != buf_tmp) {
-		buf = buf_tmp;
+	if (nl_flow != (void *)buf_tmp) {
+		nl_flow = (void *)buf_tmp;
 		size = sizeof(buf_tmp);
 		goto init;
 	}
@@ -1037,14 +1074,15 @@  mlx5_nl_flow_transpose(void *buf,
  * This handle should be unique for a given network interface to avoid
  * collisions.
  *
- * @param buf
+ * @param nl_flow
  *   Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
  * @param handle
  *   Unique 32-bit handle to use.
  */
 void
-mlx5_nl_flow_brand(void *buf, uint32_t handle)
+mlx5_nl_flow_brand(struct mlx5_nl_flow *nl_flow, uint32_t handle)
 {
+	void *buf = nl_flow->msg;
 	struct tcmsg *tcm = mnl_nlmsg_get_payload(buf);
 
 	tcm->tcm_handle = handle;
@@ -1053,8 +1091,8 @@  mlx5_nl_flow_brand(void *buf, uint32_t handle)
 /**
  * Send Netlink message with acknowledgment and process reply.
  *
- * @param nl
- *   Libmnl socket to use.
+ * @param ctx
+ *   Context object initialized by mlx5_nl_flow_ctx_create().
  * @param nlh
  *   Message to send. This function always raises the NLM_F_ACK flag and
  *   sets its sequence number before sending.
@@ -1067,26 +1105,26 @@  mlx5_nl_flow_brand(void *buf, uint32_t handle)
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx5_nl_flow_chat(struct mnl_socket *nl, struct nlmsghdr *nlh,
+mlx5_nl_flow_chat(struct mlx5_nl_flow_ctx *ctx, struct nlmsghdr *nlh,
 		  mnl_cb_t cb, void *arg)
 {
 	alignas(struct nlmsghdr)
 	uint8_t ans[MNL_SOCKET_BUFFER_SIZE];
-	unsigned int portid = mnl_socket_get_portid(nl);
-	uint32_t seq = rte_get_tsc_cycles();
+	unsigned int portid = mnl_socket_get_portid(ctx->nl);
+	uint32_t seq = ++ctx->seq ? ctx->seq : ++ctx->seq;
 	int err = 0;
 	int ret;
 
 	nlh->nlmsg_flags |= NLM_F_ACK;
 	nlh->nlmsg_seq = seq;
-	ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
+	ret = mnl_socket_sendto(ctx->nl, nlh, nlh->nlmsg_len);
 	nlh = (void *)ans;
 	/*
 	 * The following loop postpones non-fatal errors until multipart
 	 * messages are complete.
 	 */
 	while (ret > 0) {
-		ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
+		ret = mnl_socket_recvfrom(ctx->nl, ans, sizeof(ans));
 		if (ret == -1) {
 			err = errno;
 			if (err != ENOSPC)
@@ -1113,9 +1151,9 @@  mlx5_nl_flow_chat(struct mnl_socket *nl, struct nlmsghdr *nlh,
 /**
  * Create a Netlink flow rule.
  *
- * @param nl
- *   Libmnl socket to use.
- * @param buf
+ * @param ctx
+ *   Context object initialized by mlx5_nl_flow_ctx_create().
+ * @param nl_flow
  *   Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
@@ -1124,15 +1162,19 @@  mlx5_nl_flow_chat(struct mnl_socket *nl, struct nlmsghdr *nlh,
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-mlx5_nl_flow_create(struct mnl_socket *nl, void *buf,
+mlx5_nl_flow_create(struct mlx5_nl_flow_ctx *ctx, struct mlx5_nl_flow *nl_flow,
 		    struct rte_flow_error *error)
 {
-	struct nlmsghdr *nlh = buf;
+	struct nlmsghdr *nlh = (void *)nl_flow->msg;
 
+	if (nl_flow->applied)
+		return 0;
 	nlh->nlmsg_type = RTM_NEWTFILTER;
 	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
-	if (!mlx5_nl_flow_chat(nl, nlh, NULL, NULL))
+	if (!mlx5_nl_flow_chat(ctx, nlh, NULL, NULL)) {
+		nl_flow->applied = 1;
 		return 0;
+	}
 	return rte_flow_error_set
 		(error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
 		 "netlink: failed to create TC flow rule");
@@ -1141,9 +1183,12 @@  mlx5_nl_flow_create(struct mnl_socket *nl, void *buf,
 /**
  * Destroy a Netlink flow rule.
  *
- * @param nl
- *   Libmnl socket to use.
- * @param buf
+ * In case of error, no recovery is possible; caller must suppose flow rule
+ * was destroyed.
+ *
+ * @param ctx
+ *   Context object initialized by mlx5_nl_flow_ctx_create().
+ * @param nl_flow
  *   Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
@@ -1152,26 +1197,31 @@  mlx5_nl_flow_create(struct mnl_socket *nl, void *buf,
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-mlx5_nl_flow_destroy(struct mnl_socket *nl, void *buf,
+mlx5_nl_flow_destroy(struct mlx5_nl_flow_ctx *ctx, struct mlx5_nl_flow *nl_flow,
 		     struct rte_flow_error *error)
 {
-	struct nlmsghdr *nlh = buf;
+	struct nlmsghdr *nlh = (void *)nl_flow->msg;
+	int ret;
 
+	if (!nl_flow->applied)
+		return 0;
 	nlh->nlmsg_type = RTM_DELTFILTER;
 	nlh->nlmsg_flags = NLM_F_REQUEST;
-	if (!mlx5_nl_flow_chat(nl, nlh, NULL, NULL))
+	ret = mlx5_nl_flow_chat(ctx, nlh, NULL, NULL);
+	nl_flow->applied = 0;
+	if (!ret)
 		return 0;
 	return rte_flow_error_set
-		(error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+		(error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
 		 "netlink: failed to destroy TC flow rule");
 }
 
 /**
- * Initialize ingress qdisc of a given network interface.
+ * Initialize ingress qdisc of network interfaces.
  *
- * @param nl
- *   Libmnl socket of the @p NETLINK_ROUTE kind.
- * @param ifindex
+ * @param ctx
+ *   Context object initialized by mlx5_nl_flow_ctx_create().
+ * @param[in] ifindex
  *   Index of network interface to initialize.
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
@@ -1180,7 +1230,8 @@  mlx5_nl_flow_destroy(struct mnl_socket *nl, void *buf,
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-mlx5_nl_flow_ifindex_init(struct mnl_socket *nl, unsigned int ifindex,
+mlx5_nl_flow_ifindex_init(struct mlx5_nl_flow_ctx *ctx,
+			  const unsigned int ifindex,
 			  struct rte_flow_error *error)
 {
 	struct nlmsghdr *nlh;
@@ -1202,15 +1253,15 @@  mlx5_nl_flow_ifindex_init(struct mnl_socket *nl, unsigned int ifindex,
 			(error, ENOBUFS, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
 			 NULL, "netlink: not enough space for message");
 	/* Ignore errors when qdisc is already absent. */
-	if (mlx5_nl_flow_chat(nl, nlh, NULL, NULL) &&
-	    rte_errno != EINVAL && rte_errno != ENOENT)
+	if (mlx5_nl_flow_chat(ctx, nlh, NULL, NULL) &&
+	    rte_errno != EINVAL && rte_errno != ENOENT && rte_errno != EPERM)
 		return rte_flow_error_set
 			(error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
 			 NULL, "netlink: failed to remove ingress qdisc");
 	/* Create fresh ingress qdisc. */
 	nlh->nlmsg_type = RTM_NEWQDISC;
 	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
-	if (mlx5_nl_flow_chat(nl, nlh, NULL, NULL))
+	if (mlx5_nl_flow_chat(ctx, nlh, NULL, NULL))
 		return rte_flow_error_set
 			(error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
 			 NULL, "netlink: failed to create ingress qdisc");
@@ -1218,34 +1269,50 @@  mlx5_nl_flow_ifindex_init(struct mnl_socket *nl, unsigned int ifindex,
 }
 
 /**
- * Create and configure a libmnl socket for Netlink flow rules.
+ * Create NL flow rule context object.
  *
+ * @param socket
+ *   NUMA socket for memory allocations.
  * @return
- *   A valid libmnl socket object pointer on success, NULL otherwise and
- *   rte_errno is set.
+ *   A valid object on success, NULL otherwise and rte_errno is set.
  */
-struct mnl_socket *
-mlx5_nl_flow_socket_create(void)
+struct mlx5_nl_flow_ctx *
+mlx5_nl_flow_ctx_create(int socket)
 {
-	struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
+	struct mlx5_nl_flow_ctx *ctx =
+		rte_zmalloc_socket(__func__, sizeof(*ctx), 0, socket);
 
-	if (nl) {
-		mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
-				      sizeof(int));
-		if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
-			return nl;
-	}
+	if (!ctx)
+		goto error;
+	ctx->socket = socket;
+	ctx->seq = 0;
+	ctx->nl = mnl_socket_open(NETLINK_ROUTE);
+	if (!ctx->nl)
+		goto error;
+	mnl_socket_setsockopt(ctx->nl, NETLINK_CAP_ACK, &(int){ 1 },
+			      sizeof(int));
+	if (mnl_socket_bind(ctx->nl, 0, MNL_SOCKET_AUTOPID))
+		goto error;
+	return ctx;
+error:
 	rte_errno = errno;
-	if (nl)
-		mnl_socket_close(nl);
+	if (ctx) {
+		if (ctx->nl)
+			mnl_socket_close(ctx->nl);
+		rte_free(ctx);
+	}
 	return NULL;
 }
 
 /**
- * Destroy a libmnl socket.
+ * Destroy NL flow rule context object.
+ *
+ * @param ctx
+ *   Context object initialized by mlx5_nl_flow_ctx_create().
  */
 void
-mlx5_nl_flow_socket_destroy(struct mnl_socket *nl)
+mlx5_nl_flow_ctx_destroy(struct mlx5_nl_flow_ctx *ctx)
 {
-	mnl_socket_close(nl);
+	mnl_socket_close(ctx->nl);
+	rte_free(ctx);
 }