[4/8] net/mlx5: enhance TC flow rule send/ack function

Message ID 20180831092038.23051-5-adrien.mazarguil@6wind.com (mailing list archive)
State Superseded, archived
Headers
Series net/mlx5: add switch offload for VXLAN encap/decap |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Adrien Mazarguil Aug. 31, 2018, 9:57 a.m. UTC
  A callback parameter to process replies will be useful for subsequent work
in this area. It implies the following:

- Replies may be much larger than requests. In fact their size cannot
  really be known in advance. Using MNL_SOCKET_BUFFER_SIZE (at least 8192
  bytes) is the recommended approach to make truncation less likely (look
  for NLMSG_GOODSIZE in Linux).

- Multipart replies are made of several messages. A loop is needed to
  process these.

- In case of truncated message (since one cannot really be sure),
  its remaining parts must be flushed to prevent their reception by
  subsequent queries.

- Using rte_get_tsc_cycles() instead of random() for message sequence
  numbers is faster yet unlikely to pick the same number twice in a row.

- mlx5_nl_flow_init() can be simplified since the query message is never
  written over (it was already the case actually).

Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
---
 drivers/net/mlx5/mlx5_nl_flow.c | 73 ++++++++++++++++++++++++------------
 1 file changed, 48 insertions(+), 25 deletions(-)
  

Patch

diff --git a/drivers/net/mlx5/mlx5_nl_flow.c b/drivers/net/mlx5/mlx5_nl_flow.c
index 9ea2a1b55..e720728b7 100644
--- a/drivers/net/mlx5/mlx5_nl_flow.c
+++ b/drivers/net/mlx5/mlx5_nl_flow.c
@@ -22,6 +22,7 @@ 
 #include <sys/socket.h>
 
 #include <rte_byteorder.h>
+#include <rte_cycles.h>
 #include <rte_errno.h>
 #include <rte_ether.h>
 #include <rte_flow.h>
@@ -1050,38 +1051,63 @@  mlx5_nl_flow_brand(void *buf, uint32_t handle)
 }
 
 /**
- * Send Netlink message with acknowledgment.
+ * Send Netlink message with acknowledgment and process reply.
  *
  * @param nl
  *   Libmnl socket to use.
  * @param nlh
- *   Message to send. This function always raises the NLM_F_ACK flag before
- *   sending.
+ *   Message to send. This function always raises the NLM_F_ACK flag and
+ *   sets its sequence number before sending.
+ * @param cb
+ *   Callback handler for received message.
+ * @param arg
+ *   Data pointer for callback handler.
  *
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx5_nl_flow_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
+mlx5_nl_flow_chat(struct mnl_socket *nl, struct nlmsghdr *nlh,
+		  mnl_cb_t cb, void *arg)
 {
 	alignas(struct nlmsghdr)
-	uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
-		    nlh->nlmsg_len - sizeof(*nlh)];
-	uint32_t seq = random();
+	uint8_t ans[MNL_SOCKET_BUFFER_SIZE];
+	unsigned int portid = mnl_socket_get_portid(nl);
+	uint32_t seq = rte_get_tsc_cycles();
+	int err = 0;
 	int ret;
 
 	nlh->nlmsg_flags |= NLM_F_ACK;
 	nlh->nlmsg_seq = seq;
 	ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
-	if (ret != -1)
+	nlh = (void *)ans;
+	/*
+	 * The following loop postpones non-fatal errors until multipart
+	 * messages are complete.
+	 */
+	while (ret > 0) {
 		ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
-	if (ret != -1)
-		ret = mnl_cb_run
-			(ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
-	if (!ret)
+		if (ret == -1) {
+			err = errno;
+			if (err != ENOSPC)
+				break;
+			ret = sizeof(*nlh);
+		}
+		if (!err) {
+			ret = mnl_cb_run(nlh, ret, seq, portid, cb, arg);
+			if (ret < 0)
+				err = -ret;
+		}
+		if (!(nlh->nlmsg_flags & NLM_F_MULTI) ||
+		    nlh->nlmsg_type == NLMSG_DONE)
+			ret = -err;
+		else
+			ret = 1;
+	}
+	if (!err)
 		return 0;
-	rte_errno = errno;
-	return -rte_errno;
+	rte_errno = err;
+	return -err;
 }
 
 /**
@@ -1105,7 +1131,7 @@  mlx5_nl_flow_create(struct mnl_socket *nl, void *buf,
 
 	nlh->nlmsg_type = RTM_NEWTFILTER;
 	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
-	if (!mlx5_nl_flow_nl_ack(nl, nlh))
+	if (!mlx5_nl_flow_chat(nl, nlh, NULL, NULL))
 		return 0;
 	return rte_flow_error_set
 		(error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
@@ -1133,7 +1159,7 @@  mlx5_nl_flow_destroy(struct mnl_socket *nl, void *buf,
 
 	nlh->nlmsg_type = RTM_DELTFILTER;
 	nlh->nlmsg_flags = NLM_F_REQUEST;
-	if (!mlx5_nl_flow_nl_ack(nl, nlh))
+	if (!mlx5_nl_flow_chat(nl, nlh, NULL, NULL))
 		return 0;
 	return rte_flow_error_set
 		(error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
@@ -1171,23 +1197,20 @@  mlx5_nl_flow_ifindex_init(struct mnl_socket *nl, unsigned int ifindex,
 	tcm->tcm_ifindex = ifindex;
 	tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
 	tcm->tcm_parent = TC_H_INGRESS;
+	if (!mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress"))
+		return rte_flow_error_set
+			(error, ENOBUFS, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			 NULL, "netlink: not enough space for message");
 	/* Ignore errors when qdisc is already absent. */
-	if (mlx5_nl_flow_nl_ack(nl, nlh) &&
+	if (mlx5_nl_flow_chat(nl, nlh, NULL, NULL) &&
 	    rte_errno != EINVAL && rte_errno != ENOENT)
 		return rte_flow_error_set
 			(error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
 			 NULL, "netlink: failed to remove ingress qdisc");
 	/* Create fresh ingress qdisc. */
-	nlh = mnl_nlmsg_put_header(buf);
 	nlh->nlmsg_type = RTM_NEWQDISC;
 	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
-	tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
-	tcm->tcm_family = AF_UNSPEC;
-	tcm->tcm_ifindex = ifindex;
-	tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
-	tcm->tcm_parent = TC_H_INGRESS;
-	mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
-	if (mlx5_nl_flow_nl_ack(nl, nlh))
+	if (mlx5_nl_flow_chat(nl, nlh, NULL, NULL))
 		return rte_flow_error_set
 			(error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
 			 NULL, "netlink: failed to create ingress qdisc");