[v4] ethdev: add flow API to expand RSS flows

Message ID 20180627145525.8659-1-nelio.laranjeiro@6wind.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series [v4] ethdev: add flow API to expand RSS flows |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Nélio Laranjeiro June 27, 2018, 2:55 p.m. UTC
  Introduce an helper for PMD to expand easily flows items list with RSS
action into multiple flow items lists with priority information.

For instance a user items list being "eth / end" with rss action types
"ipv4-udp ipv6-udp end" needs to be expanded into three items lists:

 - eth
 - eth / ipv4 / udp
 - eth / ipv6 / udp

to match the user request.  Some drivers are unable to reach such
request without this expansion, this API is there to help those.
Only PMD should use such API for their internal cooking, the application
will still handle a single flow.

Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>

---

Changes in v4:

- Replace the expanded algorithm with a graph to support also tunnel
pattern matching.

Changes in v3:

- Fix a segmentation fault due to an uninitialized pointer.

Changes in v2:

- Fix expansion for UDP/TCP layers where L3 may not be in the original
  items list and thus is missing in the expansion.
- Fix size verification for some layers causing a segfault
---
 lib/librte_ethdev/rte_flow.c        | 105 ++++++++++++++++++++++++++++
 lib/librte_ethdev/rte_flow_driver.h |  48 +++++++++++++
 2 files changed, 153 insertions(+)
  

Comments

Adrien Mazarguil June 28, 2018, 12:33 p.m. UTC | #1
On Wed, Jun 27, 2018 at 04:55:25PM +0200, Nelio Laranjeiro wrote:
> Introduce an helper for PMD to expand easily flows items list with RSS
> action into multiple flow items lists with priority information.
> 
> For instance a user items list being "eth / end" with rss action types
> "ipv4-udp ipv6-udp end" needs to be expanded into three items lists:
> 
>  - eth
>  - eth / ipv4 / udp
>  - eth / ipv6 / udp
> 
> to match the user request.  Some drivers are unable to reach such
> request without this expansion, this API is there to help those.
> Only PMD should use such API for their internal cooking, the application
> will still handle a single flow.
> 
> Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
> ---
> 
> Changes in v4:
> 
> - Replace the expanded algorithm with a graph to support also tunnel
> pattern matching.

Looks like a useful helper, nice to see that it's much shorter than the
previous versions. A few nits below, mainly suggestions for documentation.

<snip>
> +
> +int
> +rte_flow_expand_rss(struct rte_flow_expand_rss *buf, size_t size,
> +		    const struct rte_flow_item *pat, uint64_t types,
> +		    const struct rte_flow_expand_node nodes[],
> +		    const int node_entry_index)

Please add short documentation reminder on top of this definition (i.e. a
copy of the first sentence from rte_flow_driver.h).

> +{
> +	const int elt_n = 8;
> +	const struct rte_flow_item *item;
> +	const struct rte_flow_expand_node *node = &nodes[node_entry_index];
> +	const int *next_node;
> +	const int *stack[elt_n];
> +	int stack_n = 0;
> +	struct rte_flow_item flow_items[elt_n];
> +	unsigned int i;
> +	size_t lsize;
> +	size_t user_pattern_size = 0;
> +	void *addr = NULL;
> +
> +	lsize = sizeof(*buf) +
> +		/* Size for the list of patterns. */
> +		sizeof(*buf->patterns) +
> +		RTE_ALIGN_CEIL(elt_n * sizeof(*item), sizeof(void *)) +
> +		/* Size for priorities. */
> +		sizeof(*buf->priority) +
> +		RTE_ALIGN_CEIL(elt_n * sizeof(uint32_t), sizeof(void *));
> +	if (lsize <= size) {
> +		buf->priority = (void *)(buf + 1);
> +		buf->priority[0] = 0;
> +		buf->patterns = (void *)&buf->priority[elt_n];
> +		buf->patterns[0] = (void *)&buf->patterns[elt_n];
> +		buf->entries = 0;
> +		addr = buf->patterns[0];
> +	}
> +	for (item = pat; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
> +		const struct rte_flow_expand_node *next = NULL;
> +
> +		for (i = 0; node->next && node->next[i]; ++i) {
> +			next = &nodes[node->next[i]];
> +			if (next->type == item->type)
> +				break;
> +		}
> +		if (next)
> +			node = next;
> +		user_pattern_size += sizeof(*item);
> +	}
> +	user_pattern_size += sizeof(*item); /**< Handle END item. */

This comment shouldn't be in Doxygen format.

> +	lsize += user_pattern_size;
> +	/* Copy the user pattern in the first entry of the buffer. */
> +	if (lsize <= size) {
> +		rte_memcpy(addr, pat, user_pattern_size);
> +		addr = (void *)(((uintptr_t)addr) + user_pattern_size);
> +		buf->priority[buf->entries] = 0;
> +		buf->entries = 1;
> +	}
> +	/* Start expanding. */
> +	memset(flow_items, 0, sizeof(flow_items));
> +	user_pattern_size -= sizeof(*item);
> +	next_node = node->next;
> +	stack[stack_n] = next_node;
> +	node = next_node ? &nodes[*next_node] : NULL;
> +	while (node) {
> +		flow_items[stack_n].type = node->type;
> +		if ((node->rss_types & types) == node->rss_types) {
> +			/*
> +			 * compute the number of items to copy from the
> +			 * expansion and copy it.
> +			 * When the stack_n is 0, there are 1 element in it,
> +			 * plus the addition END item.
> +			 */
> +			int elt = stack_n + 2;
> +
> +			flow_items[stack_n + 1].type = RTE_FLOW_ITEM_TYPE_END;
> +			lsize += elt * sizeof(*item) + user_pattern_size;
> +			if (lsize <= size) {
> +				size_t n = elt * sizeof(*item);
> +
> +				buf->priority[buf->entries] = stack_n + 1;
> +				buf->patterns[buf->entries++] = addr;
> +				rte_memcpy(addr, buf->patterns[0],
> +					   user_pattern_size);
> +				addr = (void *)(((uintptr_t)addr) +
> +						user_pattern_size);
> +				rte_memcpy(addr, flow_items, n);
> +				addr = (void *) (((uintptr_t)addr) + n);

Extra space after (void *).

> +			}
> +		}
> +		/* Go deeper. */
> +		if (node->next) {
> +			next_node = node->next;
> +			stack[++stack_n] = next_node;

Since stack[] contains at most elt_n (8) elements, even assuming it's always
plenty enough, I think it would be wise to check for any overflow before
incrementing stack_n and return an error if so.

> +		} else if (*(next_node + 1)) {
> +			/* Follow up with the next possibility. */
> +			++next_node;
> +		} else {
> +			/* Move to the next path. */
> +			if (stack_n)
> +				next_node = stack[--stack_n];
> +			next_node++;
> +			stack[stack_n] = next_node;
> +		}
> +		node = *next_node ? &nodes[*next_node] : NULL;
> +	};
> +	return lsize;
> +}
> diff --git a/lib/librte_ethdev/rte_flow_driver.h b/lib/librte_ethdev/rte_flow_driver.h
> index 1c90c600d..538b46e54 100644
> --- a/lib/librte_ethdev/rte_flow_driver.h
> +++ b/lib/librte_ethdev/rte_flow_driver.h
> @@ -114,6 +114,54 @@ struct rte_flow_ops {
>  const struct rte_flow_ops *
>  rte_flow_ops_get(uint16_t port_id, struct rte_flow_error *error);
>  
> +/** Macro to create the expansion graph. */

=> Helper macro to build input graph for rte_flow_expand_rss().

Mostly rewording suggestions from here on.

> +#define RTE_FLOW_EXPAND_ITEMS(...) \

You should keep the same prefix for all objects associated with this
function. Everything should start with "rte_flow_expand_rss" for
consistency.

=> RTE_FLOW_EXPAND_RSS_NEXT(...)

> +	(const int []){ \
> +		__VA_ARGS__, 0, \
> +	}
> +
> +/** structure to generate the expansion graph. */

=> Node object of input graph for rte_flow_expand_rss().

> +struct rte_flow_expand_node {

=> struct rte_flow_expand_rss_node

> +	const int *const next; /**< list of items finalised by 0. */

=> List of next node indexes. Index 0 is interpreted as a terminator.

> +	const enum rte_flow_item_type type; /**< Item type to add. */

=> Pattern item type of current node.

> +	uint64_t rss_types; /**< RSS bit-field value. */

=> RSS types bit-field associated with this node (see ETH_RSS_* definitions).

> +};
> +
> +/**
> + * Expansion structure for RSS flows.
> + */

=> Object returned by rte_flow_expand_rss().

This block could fit a single line by the way.

> +struct rte_flow_expand_rss {
> +	uint32_t entries; /**< Number of entries in the following arrays. */

=> Number of entries in @p patterns and @p priorities.

> +	struct rte_flow_item **patterns; /**< Expanded pattern array. */
> +	uint32_t *priority; /**< Priority offset for each expansion. */

How about priority => priorities since there are as many of them as
patterns?

> +};

Another suggestion regarding this structure definition, since it's entirely
written by rte_flow_expand_rss() based on an arbitrarily-sized user-provided
buffer, and given it's not a public API, a flexible array member could make
sense.

This would highlight the link between patterns and priorities and make the
result more convenient to use thanks to fewer pointers:

 struct rte_flow_expand_rss {
     uint32_t entries;
     struct {
          struct rte_flow_item *pattern;
          uint32_t priority;
     }  entry[];
 };

> +
> +/**
> + * Expand RSS flows into several possible flows according to the RSS hash
> + * fields requested and the driver capabilities.
> + *
> + * @param[in,out] buf

Since this buffer is only written to: @param[in,out] => @param[out]

> + *   Buffer to store the result expansion.

=> Buffer for expansion result. May be truncated if @p size is not large
   enough.

> + * @param[in] size
> + *   Size in octets of the buffer.

=> Buffer size in bytes. If 0, @p buf can be NULL.

> + * @param[in] pat

pat => pattern

> + *   User flow pattern.
> + * @param[in] types
> + *   RSS types expected (see ETH_RSS_*).

=> RSS types to expand (see ETH_RSS_* definitions).

> + * @param[in] nodes.

nodes => graph

> + *   Expansion graph of possibilities for the RSS.

=> Input graph to expand @p pattern according to @p types.

> + * @param[in] node_entry_index

"[in]" is unnecessary since it's not a pointer.

node_entry_index => graph_root_index

> + *   The index in the \p nodes array as start point.

Let's keep "@" instead of "\" for directives.

=> Index of root node in @p graph, typically 0.

> + *
> + * @return
> + *   The size in octets used to expand.

=> A positive value representing the size of @p buf in bytes regardless of
   @p size on success, a negative errno value otherwise and rte_errno is
   set.

> + */
> +int
> +rte_flow_expand_rss(struct rte_flow_expand_rss *buf, size_t size,
> +		    const struct rte_flow_item *pat, uint64_t types,

pat => pattern

> +		    const struct rte_flow_expand_node nodes[],
> +		    const int node_entry_index);

No need for node_entry_index to be const.

> +
>  #ifdef __cplusplus
>  }
>  #endif
> -- 
> 2.18.0
>
  

Patch

diff --git a/lib/librte_ethdev/rte_flow.c b/lib/librte_ethdev/rte_flow.c
index 2e87e59f3..96af18e16 100644
--- a/lib/librte_ethdev/rte_flow.c
+++ b/lib/librte_ethdev/rte_flow.c
@@ -526,3 +526,108 @@  rte_flow_copy(struct rte_flow_desc *desc, size_t len,
 	}
 	return 0;
 }
+
+int
+rte_flow_expand_rss(struct rte_flow_expand_rss *buf, size_t size,
+		    const struct rte_flow_item *pat, uint64_t types,
+		    const struct rte_flow_expand_node nodes[],
+		    const int node_entry_index)
+{
+	const int elt_n = 8;
+	const struct rte_flow_item *item;
+	const struct rte_flow_expand_node *node = &nodes[node_entry_index];
+	const int *next_node;
+	const int *stack[elt_n];
+	int stack_n = 0;
+	struct rte_flow_item flow_items[elt_n];
+	unsigned int i;
+	size_t lsize;
+	size_t user_pattern_size = 0;
+	void *addr = NULL;
+
+	lsize = sizeof(*buf) +
+		/* Size for the list of patterns. */
+		sizeof(*buf->patterns) +
+		RTE_ALIGN_CEIL(elt_n * sizeof(*item), sizeof(void *)) +
+		/* Size for priorities. */
+		sizeof(*buf->priority) +
+		RTE_ALIGN_CEIL(elt_n * sizeof(uint32_t), sizeof(void *));
+	if (lsize <= size) {
+		buf->priority = (void *)(buf + 1);
+		buf->priority[0] = 0;
+		buf->patterns = (void *)&buf->priority[elt_n];
+		buf->patterns[0] = (void *)&buf->patterns[elt_n];
+		buf->entries = 0;
+		addr = buf->patterns[0];
+	}
+	for (item = pat; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
+		const struct rte_flow_expand_node *next = NULL;
+
+		for (i = 0; node->next && node->next[i]; ++i) {
+			next = &nodes[node->next[i]];
+			if (next->type == item->type)
+				break;
+		}
+		if (next)
+			node = next;
+		user_pattern_size += sizeof(*item);
+	}
+	user_pattern_size += sizeof(*item); /**< Handle END item. */
+	lsize += user_pattern_size;
+	/* Copy the user pattern in the first entry of the buffer. */
+	if (lsize <= size) {
+		rte_memcpy(addr, pat, user_pattern_size);
+		addr = (void *)(((uintptr_t)addr) + user_pattern_size);
+		buf->priority[buf->entries] = 0;
+		buf->entries = 1;
+	}
+	/* Start expanding. */
+	memset(flow_items, 0, sizeof(flow_items));
+	user_pattern_size -= sizeof(*item);
+	next_node = node->next;
+	stack[stack_n] = next_node;
+	node = next_node ? &nodes[*next_node] : NULL;
+	while (node) {
+		flow_items[stack_n].type = node->type;
+		if ((node->rss_types & types) == node->rss_types) {
+			/*
+			 * compute the number of items to copy from the
+			 * expansion and copy it.
+			 * When the stack_n is 0, there are 1 element in it,
+			 * plus the addition END item.
+			 */
+			int elt = stack_n + 2;
+
+			flow_items[stack_n + 1].type = RTE_FLOW_ITEM_TYPE_END;
+			lsize += elt * sizeof(*item) + user_pattern_size;
+			if (lsize <= size) {
+				size_t n = elt * sizeof(*item);
+
+				buf->priority[buf->entries] = stack_n + 1;
+				buf->patterns[buf->entries++] = addr;
+				rte_memcpy(addr, buf->patterns[0],
+					   user_pattern_size);
+				addr = (void *)(((uintptr_t)addr) +
+						user_pattern_size);
+				rte_memcpy(addr, flow_items, n);
+				addr = (void *) (((uintptr_t)addr) + n);
+			}
+		}
+		/* Go deeper. */
+		if (node->next) {
+			next_node = node->next;
+			stack[++stack_n] = next_node;
+		} else if (*(next_node + 1)) {
+			/* Follow up with the next possibility. */
+			++next_node;
+		} else {
+			/* Move to the next path. */
+			if (stack_n)
+				next_node = stack[--stack_n];
+			next_node++;
+			stack[stack_n] = next_node;
+		}
+		node = *next_node ? &nodes[*next_node] : NULL;
+	};
+	return lsize;
+}
diff --git a/lib/librte_ethdev/rte_flow_driver.h b/lib/librte_ethdev/rte_flow_driver.h
index 1c90c600d..538b46e54 100644
--- a/lib/librte_ethdev/rte_flow_driver.h
+++ b/lib/librte_ethdev/rte_flow_driver.h
@@ -114,6 +114,54 @@  struct rte_flow_ops {
 const struct rte_flow_ops *
 rte_flow_ops_get(uint16_t port_id, struct rte_flow_error *error);
 
+/** Macro to create the expansion graph. */
+#define RTE_FLOW_EXPAND_ITEMS(...) \
+	(const int []){ \
+		__VA_ARGS__, 0, \
+	}
+
+/** structure to generate the expansion graph. */
+struct rte_flow_expand_node {
+	const int *const next; /**< list of items finalised by 0. */
+	const enum rte_flow_item_type type; /**< Item type to add. */
+	uint64_t rss_types; /**< RSS bit-field value. */
+};
+
+/**
+ * Expansion structure for RSS flows.
+ */
+struct rte_flow_expand_rss {
+	uint32_t entries; /**< Number of entries in the following arrays. */
+	struct rte_flow_item **patterns; /**< Expanded pattern array. */
+	uint32_t *priority; /**< Priority offset for each expansion. */
+};
+
+/**
+ * Expand RSS flows into several possible flows according to the RSS hash
+ * fields requested and the driver capabilities.
+ *
+ * @param[in,out] buf
+ *   Buffer to store the result expansion.
+ * @param[in] size
+ *   Size in octets of the buffer.
+ * @param[in] pat
+ *   User flow pattern.
+ * @param[in] types
+ *   RSS types expected (see ETH_RSS_*).
+ * @param[in] nodes.
+ *   Expansion graph of possibilities for the RSS.
+ * @param[in] node_entry_index
+ *   The index in the \p nodes array as start point.
+ *
+ * @return
+ *   The size in octets used to expand.
+ */
+int
+rte_flow_expand_rss(struct rte_flow_expand_rss *buf, size_t size,
+		    const struct rte_flow_item *pat, uint64_t types,
+		    const struct rte_flow_expand_node nodes[],
+		    const int node_entry_index);
+
 #ifdef __cplusplus
 }
 #endif