[v3,1/4] ethdev: introduce IP reassembly offload

Message ID 20220130175935.1947730-2-gakhil@marvell.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series ethdev: introduce IP reassembly offload |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Akhil Goyal Jan. 30, 2022, 5:59 p.m. UTC
  IP Reassembly is a costly operation if it is done in software.
The operation becomes even more costlier if IP fragments are encrypted.
However, if it is offloaded to HW, it can considerably save application
cycles.

Hence, a new offload RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY is introduced in
ethdev for devices which can attempt reassembly of packets in hardware.
rte_eth_dev_info is updated with the reassembly capabilities which a device
can support.

The resulting reassembled packet would be a typical segmented mbuf in
case of success.

And if reassembly of fragments is failed or is incomplete (if fragments do
not come before the reass_timeout), the mbuf ol_flags can be updated.
This is updated in a subsequent patch.

Signed-off-by: Akhil Goyal <gakhil@marvell.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
---
 devtools/libabigail.abignore |  5 +++++
 doc/guides/nics/features.rst | 11 +++++++++++
 lib/ethdev/rte_ethdev.c      |  1 +
 lib/ethdev/rte_ethdev.h      | 28 +++++++++++++++++++++++++++-
 4 files changed, 44 insertions(+), 1 deletion(-)
  

Comments

Ferruh Yigit Feb. 1, 2022, 2:11 p.m. UTC | #1
On 1/30/2022 5:59 PM, Akhil Goyal wrote:
> IP Reassembly is a costly operation if it is done in software.
> The operation becomes even more costlier if IP fragments are encrypted.
> However, if it is offloaded to HW, it can considerably save application
> cycles.
> 
> Hence, a new offload RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY is introduced in
> ethdev for devices which can attempt reassembly of packets in hardware.
> rte_eth_dev_info is updated with the reassembly capabilities which a device
> can support.
> 
> The resulting reassembled packet would be a typical segmented mbuf in
> case of success.
> 
> And if reassembly of fragments is failed or is incomplete (if fragments do
> not come before the reass_timeout), the mbuf ol_flags can be updated.
> This is updated in a subsequent patch.
> 
> Signed-off-by: Akhil Goyal <gakhil@marvell.com>
> Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
> ---
>   devtools/libabigail.abignore |  5 +++++
>   doc/guides/nics/features.rst | 11 +++++++++++
>   lib/ethdev/rte_ethdev.c      |  1 +
>   lib/ethdev/rte_ethdev.h      | 28 +++++++++++++++++++++++++++-
>   4 files changed, 44 insertions(+), 1 deletion(-)
> 
> diff --git a/devtools/libabigail.abignore b/devtools/libabigail.abignore
> index 4b676f317d..90f449c43a 100644
> --- a/devtools/libabigail.abignore
> +++ b/devtools/libabigail.abignore
> @@ -11,3 +11,8 @@
>   ; Ignore generated PMD information strings
>   [suppress_variable]
>           name_regexp = _pmd_info$
> +
> +; Ignore fields inserted in place of reserved_64s of rte_eth_dev_info
> +[suppress_type]
> +	name = rte_eth_dev_info
> +	has_data_member_inserted_between = {offset_of(reserved_64s), end}
> diff --git a/doc/guides/nics/features.rst b/doc/guides/nics/features.rst
> index 27be2d2576..b45bce4a78 100644
> --- a/doc/guides/nics/features.rst
> +++ b/doc/guides/nics/features.rst
> @@ -602,6 +602,17 @@ Supports inner packet L4 checksum.
>     ``tx_offload_capa,tx_queue_offload_capa:RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM``.
>   
>   
> +.. _nic_features_ip_reassembly:
> +
> +IP reassembly
> +-------------
> +
> +Supports IP reassembly in hardware.
> +
> +* **[uses]     rte_eth_rxconf,rte_eth_rxmode**: ``offloads:RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY``.
> +* **[provides] rte_eth_dev_info**: ``reass_capa``.
> +
> +
>   .. _nic_features_shared_rx_queue:
>   
>   Shared Rx queue
> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
> index a1d475a292..d9a03f12f9 100644
> --- a/lib/ethdev/rte_ethdev.c
> +++ b/lib/ethdev/rte_ethdev.c
> @@ -126,6 +126,7 @@ static const struct {
>   	RTE_RX_OFFLOAD_BIT2STR(OUTER_UDP_CKSUM),
>   	RTE_RX_OFFLOAD_BIT2STR(RSS_HASH),
>   	RTE_RX_OFFLOAD_BIT2STR(BUFFER_SPLIT),
> +	RTE_RX_OFFLOAD_BIT2STR(IP_REASSEMBLY),
>   };
>   
>   #undef RTE_RX_OFFLOAD_BIT2STR
> diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
> index fa299c8ad7..cfaf7a5afc 100644
> --- a/lib/ethdev/rte_ethdev.h
> +++ b/lib/ethdev/rte_ethdev.h
> @@ -1586,6 +1586,7 @@ struct rte_eth_conf {
>   #define RTE_ETH_RX_OFFLOAD_RSS_HASH         RTE_BIT64(19)
>   #define DEV_RX_OFFLOAD_RSS_HASH             RTE_ETH_RX_OFFLOAD_RSS_HASH
>   #define RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT     RTE_BIT64(20)
> +#define RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY    RTE_BIT64(21)
>   
>   #define RTE_ETH_RX_OFFLOAD_CHECKSUM (RTE_ETH_RX_OFFLOAD_IPV4_CKSUM | \
>   				 RTE_ETH_RX_OFFLOAD_UDP_CKSUM | \
> @@ -1781,6 +1782,29 @@ enum rte_eth_representor_type {
>   	RTE_ETH_REPRESENTOR_PF,   /**< representor of Physical Function. */
>   };
>   
> +/* Flag to offload IP reassembly for IPv4 packets. */
> +#define RTE_ETH_DEV_REASSEMBLY_F_IPV4 (RTE_BIT32(0))
> +/* Flag to offload IP reassembly for IPv6 packets. */
> +#define RTE_ETH_DEV_REASSEMBLY_F_IPV6 (RTE_BIT32(1))
> +/**
> + * A structure used to get/set IP reassembly configuration.
> + *
> + * If RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY flag is set in offloads field,
> + * the PMD will attempt IP reassembly for the received packets as per
> + * properties defined in this structure.
> + */
> +struct rte_eth_ip_reass_params {

As a generic comment, what do you think to use full 'reassembly' instead
of 'reass' short version, to clarify/simplify the meaning?

> +	/** Maximum time in ms which PMD can wait for other fragments. */
> +	uint32_t reass_timeout_ms;
> +	/** Maximum number of fragments that can be reassembled. */
> +	uint16_t max_frags;
> +	/**
> +	 * Flags to enable reassembly of packet types -
> +	 * RTE_ETH_DEV_REASSEMBLY_F_xxx.
> +	 */
> +	uint16_t flags;
> +};
> +
>   /**
>    * A structure used to retrieve the contextual information of
>    * an Ethernet device, such as the controlling driver of the
> @@ -1841,8 +1865,10 @@ struct rte_eth_dev_info {
>   	 * embedded managed interconnect/switch.
>   	 */
>   	struct rte_eth_switch_info switch_info;
> +	/** IP reassembly offload capabilities that a device can support. */
> +	struct rte_eth_ip_reass_params reass_capa;
>   

"struct rte_eth_dev_info" & 'rte_eth_dev_info_get()' are very common,
all applications that use net devices and even some internal APIs rely on
this struct and API.
It makes me uneasy to extend this struct with rarely used features,
worrying on loading to much (capability/status/config) on single
API/struct can cause an unmaintainable code by time.

Also most of the time (if not always) offload flag is just an on/off flag
to the PMD, application set/unset offload flag and PMD knows what to do.
But for this case some capability variables, and a configuration API is
required/involved.

For considering above two cases, what do you think implement this as
control plane APIs instead of offload flag?
There are already 'conf_set()' and 'conf_get()' APIs introduced in coming
patches, introducing an additional 'capability_get()' API removes the need
of change in "struct rte_eth_dev_info" and 'RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY'
can be removed.
Thomas, Andrew, what do you think?


> -	uint64_t reserved_64s[2]; /**< Reserved for future fields */
> +	uint64_t reserved_64s[1]; /**< Reserved for future fields */
>   	void *reserved_ptrs[2];   /**< Reserved for future fields */
>   };
>
  
Akhil Goyal Feb. 2, 2022, 10:57 a.m. UTC | #2
> > +/* Flag to offload IP reassembly for IPv4 packets. */
> > +#define RTE_ETH_DEV_REASSEMBLY_F_IPV4 (RTE_BIT32(0))
> > +/* Flag to offload IP reassembly for IPv6 packets. */
> > +#define RTE_ETH_DEV_REASSEMBLY_F_IPV6 (RTE_BIT32(1))
> > +/**
> > + * A structure used to get/set IP reassembly configuration.
> > + *
> > + * If RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY flag is set in offloads field,
> > + * the PMD will attempt IP reassembly for the received packets as per
> > + * properties defined in this structure.
> > + */
> > +struct rte_eth_ip_reass_params {
> 
> As a generic comment, what do you think to use full 'reassembly' instead
> of 'reass' short version, to clarify/simplify the meaning?

Full reassembly was used in most places. But here the struct name would be too big.
IMO, reass is good enough here. Though, no strong opinion. Will change if you insist.

> 
> > +	/** Maximum time in ms which PMD can wait for other fragments. */
> > +	uint32_t reass_timeout_ms;
> > +	/** Maximum number of fragments that can be reassembled. */
> > +	uint16_t max_frags;
> > +	/**
> > +	 * Flags to enable reassembly of packet types -
> > +	 * RTE_ETH_DEV_REASSEMBLY_F_xxx.
> > +	 */
> > +	uint16_t flags;
> > +};
> > +
> >   /**
> >    * A structure used to retrieve the contextual information of
> >    * an Ethernet device, such as the controlling driver of the
> > @@ -1841,8 +1865,10 @@ struct rte_eth_dev_info {
> >   	 * embedded managed interconnect/switch.
> >   	 */
> >   	struct rte_eth_switch_info switch_info;
> > +	/** IP reassembly offload capabilities that a device can support. */
> > +	struct rte_eth_ip_reass_params reass_capa;
> >
> 
> "struct rte_eth_dev_info" & 'rte_eth_dev_info_get()' are very common,
> all applications that use net devices and even some internal APIs rely on
> this struct and API.
> It makes me uneasy to extend this struct with rarely used features,
> worrying on loading to much (capability/status/config) on single
> API/struct can cause an unmaintainable code by time.
> 
> Also most of the time (if not always) offload flag is just an on/off flag
> to the PMD, application set/unset offload flag and PMD knows what to do.
> But for this case some capability variables, and a configuration API is
> required/involved.
> 
> For considering above two cases, what do you think implement this as
> control plane APIs instead of offload flag?
> There are already 'conf_set()' and 'conf_get()' APIs introduced in coming
> patches, introducing an additional 'capability_get()' API removes the need
> of change in "struct rte_eth_dev_info" and
> 'RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY'
> can be removed.
> Thomas, Andrew, what do you think?

We are ok to add a new dev_op for capability_get() if we agree on that.
Thomas, Andrew, let me know if you think otherwise.

> 
> 
> > -	uint64_t reserved_64s[2]; /**< Reserved for future fields */
> > +	uint64_t reserved_64s[1]; /**< Reserved for future fields */
> >   	void *reserved_ptrs[2];   /**< Reserved for future fields */
> >   };
> >
  
Ferruh Yigit Feb. 2, 2022, 2:05 p.m. UTC | #3
On 2/2/2022 10:57 AM, Akhil Goyal wrote:
>>> +/* Flag to offload IP reassembly for IPv4 packets. */
>>> +#define RTE_ETH_DEV_REASSEMBLY_F_IPV4 (RTE_BIT32(0))
>>> +/* Flag to offload IP reassembly for IPv6 packets. */
>>> +#define RTE_ETH_DEV_REASSEMBLY_F_IPV6 (RTE_BIT32(1))
>>> +/**
>>> + * A structure used to get/set IP reassembly configuration.
>>> + *
>>> + * If RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY flag is set in offloads field,
>>> + * the PMD will attempt IP reassembly for the received packets as per
>>> + * properties defined in this structure.
>>> + */
>>> +struct rte_eth_ip_reass_params {
>>
>> As a generic comment, what do you think to use full 'reassembly' instead
>> of 'reass' short version, to clarify/simplify the meaning?
> 
> Full reassembly was used in most places. But here the struct name would be too big.
> IMO, reass is good enough here. Though, no strong opinion. Will change if you insist.
> 

Only it doesn't remind me 'reassembly' when I see 'reass', so I think not clear,
we can wait for more comments from others.

>>
>>> +	/** Maximum time in ms which PMD can wait for other fragments. */
>>> +	uint32_t reass_timeout_ms;
>>> +	/** Maximum number of fragments that can be reassembled. */
>>> +	uint16_t max_frags;
>>> +	/**
>>> +	 * Flags to enable reassembly of packet types -
>>> +	 * RTE_ETH_DEV_REASSEMBLY_F_xxx.
>>> +	 */
>>> +	uint16_t flags;
>>> +};
>>> +
>>>    /**
>>>     * A structure used to retrieve the contextual information of
>>>     * an Ethernet device, such as the controlling driver of the
>>> @@ -1841,8 +1865,10 @@ struct rte_eth_dev_info {
>>>    	 * embedded managed interconnect/switch.
>>>    	 */
>>>    	struct rte_eth_switch_info switch_info;
>>> +	/** IP reassembly offload capabilities that a device can support. */
>>> +	struct rte_eth_ip_reass_params reass_capa;
>>>
>>
>> "struct rte_eth_dev_info" & 'rte_eth_dev_info_get()' are very common,
>> all applications that use net devices and even some internal APIs rely on
>> this struct and API.
>> It makes me uneasy to extend this struct with rarely used features,
>> worrying on loading to much (capability/status/config) on single
>> API/struct can cause an unmaintainable code by time.
>>
>> Also most of the time (if not always) offload flag is just an on/off flag
>> to the PMD, application set/unset offload flag and PMD knows what to do.
>> But for this case some capability variables, and a configuration API is
>> required/involved.
>>
>> For considering above two cases, what do you think implement this as
>> control plane APIs instead of offload flag?
>> There are already 'conf_set()' and 'conf_get()' APIs introduced in coming
>> patches, introducing an additional 'capability_get()' API removes the need
>> of change in "struct rte_eth_dev_info" and
>> 'RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY'
>> can be removed.
>> Thomas, Andrew, what do you think?
> 
> We are ok to add a new dev_op for capability_get() if we agree on that.
> Thomas, Andrew, let me know if you think otherwise.
> 
>>
>>
>>> -	uint64_t reserved_64s[2]; /**< Reserved for future fields */
>>> +	uint64_t reserved_64s[1]; /**< Reserved for future fields */
>>>    	void *reserved_ptrs[2];   /**< Reserved for future fields */
>>>    };
>>>
>
  

Patch

diff --git a/devtools/libabigail.abignore b/devtools/libabigail.abignore
index 4b676f317d..90f449c43a 100644
--- a/devtools/libabigail.abignore
+++ b/devtools/libabigail.abignore
@@ -11,3 +11,8 @@ 
 ; Ignore generated PMD information strings
 [suppress_variable]
         name_regexp = _pmd_info$
+
+; Ignore fields inserted in place of reserved_64s of rte_eth_dev_info
+[suppress_type]
+	name = rte_eth_dev_info
+	has_data_member_inserted_between = {offset_of(reserved_64s), end}
diff --git a/doc/guides/nics/features.rst b/doc/guides/nics/features.rst
index 27be2d2576..b45bce4a78 100644
--- a/doc/guides/nics/features.rst
+++ b/doc/guides/nics/features.rst
@@ -602,6 +602,17 @@  Supports inner packet L4 checksum.
   ``tx_offload_capa,tx_queue_offload_capa:RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM``.
 
 
+.. _nic_features_ip_reassembly:
+
+IP reassembly
+-------------
+
+Supports IP reassembly in hardware.
+
+* **[uses]     rte_eth_rxconf,rte_eth_rxmode**: ``offloads:RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY``.
+* **[provides] rte_eth_dev_info**: ``reass_capa``.
+
+
 .. _nic_features_shared_rx_queue:
 
 Shared Rx queue
diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index a1d475a292..d9a03f12f9 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -126,6 +126,7 @@  static const struct {
 	RTE_RX_OFFLOAD_BIT2STR(OUTER_UDP_CKSUM),
 	RTE_RX_OFFLOAD_BIT2STR(RSS_HASH),
 	RTE_RX_OFFLOAD_BIT2STR(BUFFER_SPLIT),
+	RTE_RX_OFFLOAD_BIT2STR(IP_REASSEMBLY),
 };
 
 #undef RTE_RX_OFFLOAD_BIT2STR
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index fa299c8ad7..cfaf7a5afc 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -1586,6 +1586,7 @@  struct rte_eth_conf {
 #define RTE_ETH_RX_OFFLOAD_RSS_HASH         RTE_BIT64(19)
 #define DEV_RX_OFFLOAD_RSS_HASH             RTE_ETH_RX_OFFLOAD_RSS_HASH
 #define RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT     RTE_BIT64(20)
+#define RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY    RTE_BIT64(21)
 
 #define RTE_ETH_RX_OFFLOAD_CHECKSUM (RTE_ETH_RX_OFFLOAD_IPV4_CKSUM | \
 				 RTE_ETH_RX_OFFLOAD_UDP_CKSUM | \
@@ -1781,6 +1782,29 @@  enum rte_eth_representor_type {
 	RTE_ETH_REPRESENTOR_PF,   /**< representor of Physical Function. */
 };
 
+/* Flag to offload IP reassembly for IPv4 packets. */
+#define RTE_ETH_DEV_REASSEMBLY_F_IPV4 (RTE_BIT32(0))
+/* Flag to offload IP reassembly for IPv6 packets. */
+#define RTE_ETH_DEV_REASSEMBLY_F_IPV6 (RTE_BIT32(1))
+/**
+ * A structure used to get/set IP reassembly configuration.
+ *
+ * If RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY flag is set in offloads field,
+ * the PMD will attempt IP reassembly for the received packets as per
+ * properties defined in this structure.
+ */
+struct rte_eth_ip_reass_params {
+	/** Maximum time in ms which PMD can wait for other fragments. */
+	uint32_t reass_timeout_ms;
+	/** Maximum number of fragments that can be reassembled. */
+	uint16_t max_frags;
+	/**
+	 * Flags to enable reassembly of packet types -
+	 * RTE_ETH_DEV_REASSEMBLY_F_xxx.
+	 */
+	uint16_t flags;
+};
+
 /**
  * A structure used to retrieve the contextual information of
  * an Ethernet device, such as the controlling driver of the
@@ -1841,8 +1865,10 @@  struct rte_eth_dev_info {
 	 * embedded managed interconnect/switch.
 	 */
 	struct rte_eth_switch_info switch_info;
+	/** IP reassembly offload capabilities that a device can support. */
+	struct rte_eth_ip_reass_params reass_capa;
 
-	uint64_t reserved_64s[2]; /**< Reserved for future fields */
+	uint64_t reserved_64s[1]; /**< Reserved for future fields */
 	void *reserved_ptrs[2];   /**< Reserved for future fields */
 };