[1/8] ethdev: introduce IP reassembly offload

Message ID 20220103150813.1694888-2-gakhil@marvell.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series ethdev: introduce IP reassembly offload |

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Akhil Goyal Jan. 3, 2022, 3:08 p.m. UTC
  IP Reassembly is a costly operation if it is done in software.
The operation becomes even more costlier if IP fragmants are encrypted.
However, if it is offloaded to HW, it can considerably save application cycles.

Hence, a new offload RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY is introduced in
ethdev for devices which can attempt reassembly of packets in hardware.
rte_eth_dev_info is updated with the reassembly capabilities which a device
can support.

The resulting reassembled packet would be a typical segmented mbuf in
case of success.

And if reassembly of fragments is failed or is incomplete (if fragments do
not come before the reass_timeout), the mbuf ol_flags can be updated.
This is updated in a subsequent patch.

Signed-off-by: Akhil Goyal <gakhil@marvell.com>
---
 doc/guides/nics/features.rst | 12 ++++++++++++
 lib/ethdev/rte_ethdev.c      |  1 +
 lib/ethdev/rte_ethdev.h      | 32 +++++++++++++++++++++++++++++++-
 3 files changed, 44 insertions(+), 1 deletion(-)
  

Comments

Ananyev, Konstantin Jan. 11, 2022, 4:03 p.m. UTC | #1
> IP Reassembly is a costly operation if it is done in software.
> The operation becomes even more costlier if IP fragmants are encrypted.
> However, if it is offloaded to HW, it can considerably save application cycles.
> 
> Hence, a new offload RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY is introduced in
> ethdev for devices which can attempt reassembly of packets in hardware.
> rte_eth_dev_info is updated with the reassembly capabilities which a device
> can support.
> 
> The resulting reassembled packet would be a typical segmented mbuf in
> case of success.
> 
> And if reassembly of fragments is failed or is incomplete (if fragments do
> not come before the reass_timeout), the mbuf ol_flags can be updated.
> This is updated in a subsequent patch.
> 
> Signed-off-by: Akhil Goyal <gakhil@marvell.com>
> ---
>  doc/guides/nics/features.rst | 12 ++++++++++++
>  lib/ethdev/rte_ethdev.c      |  1 +
>  lib/ethdev/rte_ethdev.h      | 32 +++++++++++++++++++++++++++++++-
>  3 files changed, 44 insertions(+), 1 deletion(-)
> 
> diff --git a/doc/guides/nics/features.rst b/doc/guides/nics/features.rst
> index 27be2d2576..1dfdee9602 100644
> --- a/doc/guides/nics/features.rst
> +++ b/doc/guides/nics/features.rst
> @@ -602,6 +602,18 @@ Supports inner packet L4 checksum.
>    ``tx_offload_capa,tx_queue_offload_capa:RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM``.
> 
> 
> +.. _nic_features_ip_reassembly:
> +
> +IP reassembly
> +-------------
> +
> +Supports IP reassembly in hardware.
> +
> +* **[uses]     rte_eth_rxconf,rte_eth_rxmode**: ``offloads:RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY``.
> +* **[provides] mbuf**: ``mbuf.ol_flags:RTE_MBUF_F_RX_IP_REASSEMBLY_INCOMPLETE``.
> +* **[provides] rte_eth_dev_info**: ``reass_capa``.
> +
> +
>  .. _nic_features_shared_rx_queue:
> 
>  Shared Rx queue
> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
> index a1d475a292..d9a03f12f9 100644
> --- a/lib/ethdev/rte_ethdev.c
> +++ b/lib/ethdev/rte_ethdev.c
> @@ -126,6 +126,7 @@ static const struct {
>  	RTE_RX_OFFLOAD_BIT2STR(OUTER_UDP_CKSUM),
>  	RTE_RX_OFFLOAD_BIT2STR(RSS_HASH),
>  	RTE_RX_OFFLOAD_BIT2STR(BUFFER_SPLIT),
> +	RTE_RX_OFFLOAD_BIT2STR(IP_REASSEMBLY),
>  };
> 
>  #undef RTE_RX_OFFLOAD_BIT2STR
> diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
> index fa299c8ad7..11427b2e4d 100644
> --- a/lib/ethdev/rte_ethdev.h
> +++ b/lib/ethdev/rte_ethdev.h
> @@ -1586,6 +1586,7 @@ struct rte_eth_conf {
>  #define RTE_ETH_RX_OFFLOAD_RSS_HASH         RTE_BIT64(19)
>  #define DEV_RX_OFFLOAD_RSS_HASH             RTE_ETH_RX_OFFLOAD_RSS_HASH
>  #define RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT     RTE_BIT64(20)
> +#define RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY    RTE_BIT64(21)
> 
>  #define RTE_ETH_RX_OFFLOAD_CHECKSUM (RTE_ETH_RX_OFFLOAD_IPV4_CKSUM | \
>  				 RTE_ETH_RX_OFFLOAD_UDP_CKSUM | \
> @@ -1781,6 +1782,33 @@ enum rte_eth_representor_type {
>  	RTE_ETH_REPRESENTOR_PF,   /**< representor of Physical Function. */
>  };
> 
> +/* Flag to offload IP reassembly for IPv4 packets. */
> +#define RTE_ETH_DEV_REASSEMBLY_F_IPV4 (RTE_BIT32(0))
> +/* Flag to offload IP reassembly for IPv6 packets. */
> +#define RTE_ETH_DEV_REASSEMBLY_F_IPV6 (RTE_BIT32(1))
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this structure may change without prior notice.
> + *
> + * A structure used to set IP reassembly configuration.
> + *
> + * If RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY flag is set in offloads field,
> + * the PMD will attempt IP reassembly for the received packets as per
> + * properties defined in this structure:
> + *
> + */
> +struct rte_eth_ip_reass_params {
> +	/** Maximum time in ms which PMD can wait for other fragments. */
> +	uint32_t reass_timeout;
> +	/** Maximum number of fragments that can be reassembled. */
> +	uint16_t max_frags;
> +	/**
> +	 * Flags to enable reassembly of packet types -
> +	 * RTE_ETH_DEV_REASSEMBLY_F_xxx.
> +	 */
> +	uint16_t flags;
> +};
> +
>  /**
>   * A structure used to retrieve the contextual information of
>   * an Ethernet device, such as the controlling driver of the
> @@ -1841,8 +1869,10 @@ struct rte_eth_dev_info {
>  	 * embedded managed interconnect/switch.
>  	 */
>  	struct rte_eth_switch_info switch_info;
> +	/** IP reassembly offload capabilities that a device can support. */
> +	struct rte_eth_ip_reass_params reass_capa;
> 
> -	uint64_t reserved_64s[2]; /**< Reserved for future fields */
> +	uint64_t reserved_64s[1]; /**< Reserved for future fields */
>  	void *reserved_ptrs[2];   /**< Reserved for future fields */
>  };
> 
> --

Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>

> 2.25.1
  
Andrew Rybchenko Jan. 22, 2022, 7:38 a.m. UTC | #2
On 1/3/22 18:08, Akhil Goyal wrote:
> IP Reassembly is a costly operation if it is done in software.
> The operation becomes even more costlier if IP fragmants are encrypted.
> However, if it is offloaded to HW, it can considerably save application cycles.
> 
> Hence, a new offload RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY is introduced in
> ethdev for devices which can attempt reassembly of packets in hardware.
> rte_eth_dev_info is updated with the reassembly capabilities which a device
> can support.

Yes, reassembly is really complicated process taking possibility to have
overlapping fragments, out-of-order etc.
There are network attacks based on IP reassembly.
Will it simply result in IP reassembly failure if no buffers are left
for IP fragments? What will be reported in mbuf if some packets overlap?
Just raw packets as is or reassembly result with holes?
I think behavour should be specified/

> The resulting reassembled packet would be a typical segmented mbuf in
> case of success.
> 
> And if reassembly of fragments is failed or is incomplete (if fragments do
> not come before the reass_timeout), the mbuf ol_flags can be updated.
> This is updated in a subsequent patch.
> 
> Signed-off-by: Akhil Goyal <gakhil@marvell.com>
> ---
>   doc/guides/nics/features.rst | 12 ++++++++++++
>   lib/ethdev/rte_ethdev.c      |  1 +
>   lib/ethdev/rte_ethdev.h      | 32 +++++++++++++++++++++++++++++++-
>   3 files changed, 44 insertions(+), 1 deletion(-)
> 
> diff --git a/doc/guides/nics/features.rst b/doc/guides/nics/features.rst
> index 27be2d2576..1dfdee9602 100644
> --- a/doc/guides/nics/features.rst
> +++ b/doc/guides/nics/features.rst
> @@ -602,6 +602,18 @@ Supports inner packet L4 checksum.
>     ``tx_offload_capa,tx_queue_offload_capa:RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM``.
>   
>   
> +.. _nic_features_ip_reassembly:
> +
> +IP reassembly
> +-------------
> +
> +Supports IP reassembly in hardware.
> +
> +* **[uses]     rte_eth_rxconf,rte_eth_rxmode**: ``offloads:RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY``.

Looking at the patch I see no changes and usage of rte_eth_rxconf and
rte_eth_rxmode. It should be added here later if corresponding changes
come in subsequent patches.

> +* **[provides] mbuf**: ``mbuf.ol_flags:RTE_MBUF_F_RX_IP_REASSEMBLY_INCOMPLETE``

Same here. The flag is not defined yet. So, it must not be mentioned in
the patch.
.
> +* **[provides] rte_eth_dev_info**: ``reass_capa``.
> +
> +
>   .. _nic_features_shared_rx_queue:
>   
>   Shared Rx queue


> diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
> index fa299c8ad7..11427b2e4d 100644
> --- a/lib/ethdev/rte_ethdev.h
> +++ b/lib/ethdev/rte_ethdev.h

[snip]

> @@ -1781,6 +1782,33 @@ enum rte_eth_representor_type {
>   	RTE_ETH_REPRESENTOR_PF,   /**< representor of Physical Function. */
>   };
>   
> +/* Flag to offload IP reassembly for IPv4 packets. */
> +#define RTE_ETH_DEV_REASSEMBLY_F_IPV4 (RTE_BIT32(0))
> +/* Flag to offload IP reassembly for IPv6 packets. */
> +#define RTE_ETH_DEV_REASSEMBLY_F_IPV6 (RTE_BIT32(1))
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this structure may change without prior notice.
> + *
> + * A structure used to set IP reassembly configuration.

In the patch the structure is used to provide capabilities,
not to set configuration.

If you are going to use the same structure in capabilities and
configuration, it could be handy, but really confusing since
interpretation of fields should be different.
As a bare minimum the difference must be specified in comments.
Right now all fields makes sense in capabilities and configuration:
maximum possible vs actual value, however, not everything could be
really configurable and it will become confusing. It is really hard
to discuss right now since the patch does not provide usage of the
structure for the configuration.

> + *
> + * If RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY flag is set in offloads field,
> + * the PMD will attempt IP reassembly for the received packets as per
> + * properties defined in this structure:
> + *
> + */
> +struct rte_eth_ip_reass_params {
> +	/** Maximum time in ms which PMD can wait for other fragments. */
> +	uint32_t reass_timeout;

Please, specify units. May be even in field name. E.g. reass_timeout_ms.

> +	/** Maximum number of fragments that can be reassembled. */
> +	uint16_t max_frags;
> +	/**
> +	 * Flags to enable reassembly of packet types -
> +	 * RTE_ETH_DEV_REASSEMBLY_F_xxx.
> +	 */
> +	uint16_t flags;

If it is just for packet types, I'd suggest to name the field more
precise. Also it will avoid flags vs frags misreading.
Just an idea. Up to you.

> +};
> +
>   /**
>    * A structure used to retrieve the contextual information of
>    * an Ethernet device, such as the controlling driver of the
> @@ -1841,8 +1869,10 @@ struct rte_eth_dev_info {
>   	 * embedded managed interconnect/switch.
>   	 */
>   	struct rte_eth_switch_info switch_info;
> +	/** IP reassembly offload capabilities that a device can support. */
> +	struct rte_eth_ip_reass_params reass_capa;
>   
> -	uint64_t reserved_64s[2]; /**< Reserved for future fields */
> +	uint64_t reserved_64s[1]; /**< Reserved for future fields */
>   	void *reserved_ptrs[2];   /**< Reserved for future fields */
>   };
>
  
Akhil Goyal Jan. 30, 2022, 4:53 p.m. UTC | #3
Hi Andrew,
Thanks for the review.
> On 1/3/22 18:08, Akhil Goyal wrote:
> > IP Reassembly is a costly operation if it is done in software.
> > The operation becomes even more costlier if IP fragmants are encrypted.
> > However, if it is offloaded to HW, it can considerably save application cycles.
> >
> > Hence, a new offload RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY is introduced
> in
> > ethdev for devices which can attempt reassembly of packets in hardware.
> > rte_eth_dev_info is updated with the reassembly capabilities which a device
> > can support.
> 
> Yes, reassembly is really complicated process taking possibility to have
> overlapping fragments, out-of-order etc.
> There are network attacks based on IP reassembly.
> Will it simply result in IP reassembly failure if no buffers are left
> for IP fragments? What will be reported in mbuf if some packets overlap?
> Just raw packets as is or reassembly result with holes?
> I think behavour should be specified/

The PMD will set the reassembly incomplete dynflag and the user can retrieve the
Mbufs using dynfields. This is shown in v2 of the patchset and a test app is also added
To cover this negative case.

> 
> > The resulting reassembled packet would be a typical segmented mbuf in
> > case of success.
> >
> > And if reassembly of fragments is failed or is incomplete (if fragments do
> > not come before the reass_timeout), the mbuf ol_flags can be updated.
> > This is updated in a subsequent patch.
> >
> > Signed-off-by: Akhil Goyal <gakhil@marvell.com>
> > ---
> >   doc/guides/nics/features.rst | 12 ++++++++++++
> >   lib/ethdev/rte_ethdev.c      |  1 +
> >   lib/ethdev/rte_ethdev.h      | 32 +++++++++++++++++++++++++++++++-
> >   3 files changed, 44 insertions(+), 1 deletion(-)
> >
> > diff --git a/doc/guides/nics/features.rst b/doc/guides/nics/features.rst
> > index 27be2d2576..1dfdee9602 100644
> > --- a/doc/guides/nics/features.rst
> > +++ b/doc/guides/nics/features.rst
> > @@ -602,6 +602,18 @@ Supports inner packet L4 checksum.
> >
> ``tx_offload_capa,tx_queue_offload_capa:RTE_ETH_TX_OFFLOAD_OUTER_UD
> P_CKSUM``.
> >
> >
> > +.. _nic_features_ip_reassembly:
> > +
> > +IP reassembly
> > +-------------
> > +
> > +Supports IP reassembly in hardware.
> > +
> > +* **[uses]     rte_eth_rxconf,rte_eth_rxmode**:
> ``offloads:RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY``.
> 
> Looking at the patch I see no changes and usage of rte_eth_rxconf and
> rte_eth_rxmode. It should be added here later if corresponding changes
> come in subsequent patches.
> 
> > +* **[provides] mbuf**:
> ``mbuf.ol_flags:RTE_MBUF_F_RX_IP_REASSEMBLY_INCOMPLETE``
> 
> Same here. The flag is not defined yet. So, it must not be mentioned in
> the patch.
> .
Ok

> > +* **[provides] rte_eth_dev_info**: ``reass_capa``.
> > +
> > +
> >   .. _nic_features_shared_rx_queue:
> >
> >   Shared Rx queue
> 
> 
> > diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
> > index fa299c8ad7..11427b2e4d 100644
> > --- a/lib/ethdev/rte_ethdev.h
> > +++ b/lib/ethdev/rte_ethdev.h
> 
> [snip]
> 
> > @@ -1781,6 +1782,33 @@ enum rte_eth_representor_type {
> >   	RTE_ETH_REPRESENTOR_PF,   /**< representor of Physical Function. */
> >   };
> >
> > +/* Flag to offload IP reassembly for IPv4 packets. */
> > +#define RTE_ETH_DEV_REASSEMBLY_F_IPV4 (RTE_BIT32(0))
> > +/* Flag to offload IP reassembly for IPv6 packets. */
> > +#define RTE_ETH_DEV_REASSEMBLY_F_IPV6 (RTE_BIT32(1))
> > +/**
> > + * @warning
> > + * @b EXPERIMENTAL: this structure may change without prior notice.
> > + *
> > + * A structure used to set IP reassembly configuration.
> 
> In the patch the structure is used to provide capabilities,
> not to set configuration.
> 
> If you are going to use the same structure in capabilities and
> configuration, it could be handy, but really confusing since
> interpretation of fields should be different.
> As a bare minimum the difference must be specified in comments.
> Right now all fields makes sense in capabilities and configuration:
> maximum possible vs actual value, however, not everything could be
> really configurable and it will become confusing. It is really hard
> to discuss right now since the patch does not provide usage of the
> structure for the configuration.

The idea is to use it both for capabilities as well as configuration of those values.
And from library perspective not creating any limitation about which param can
Be configured which cannot be configured.
However will add a comment to specify both usage.
Could you please check v2 and test app for the usage?

> 
> > + *
> > + * If RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY flag is set in offloads field,
> > + * the PMD will attempt IP reassembly for the received packets as per
> > + * properties defined in this structure:
> > + *
> > + */
> > +struct rte_eth_ip_reass_params {
> > +	/** Maximum time in ms which PMD can wait for other fragments. */
> > +	uint32_t reass_timeout;
> 
> Please, specify units. May be even in field name. E.g. reass_timeout_ms.
Ok


> 
> > +	/** Maximum number of fragments that can be reassembled. */
> > +	uint16_t max_frags;
> > +	/**
> > +	 * Flags to enable reassembly of packet types -
> > +	 * RTE_ETH_DEV_REASSEMBLY_F_xxx.
> > +	 */
> > +	uint16_t flags;
> 
> If it is just for packet types, I'd suggest to name the field more
> precise. Also it will avoid flags vs frags misreading.
> Just an idea. Up to you.

Currently it is for packet type, but it can be extended for further usage in future.
Hence thought of making it generic - flags.
What do you suggest? How about renaming it to 'options'?

Regards,
Akhil
  

Patch

diff --git a/doc/guides/nics/features.rst b/doc/guides/nics/features.rst
index 27be2d2576..1dfdee9602 100644
--- a/doc/guides/nics/features.rst
+++ b/doc/guides/nics/features.rst
@@ -602,6 +602,18 @@  Supports inner packet L4 checksum.
   ``tx_offload_capa,tx_queue_offload_capa:RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM``.
 
 
+.. _nic_features_ip_reassembly:
+
+IP reassembly
+-------------
+
+Supports IP reassembly in hardware.
+
+* **[uses]     rte_eth_rxconf,rte_eth_rxmode**: ``offloads:RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY``.
+* **[provides] mbuf**: ``mbuf.ol_flags:RTE_MBUF_F_RX_IP_REASSEMBLY_INCOMPLETE``.
+* **[provides] rte_eth_dev_info**: ``reass_capa``.
+
+
 .. _nic_features_shared_rx_queue:
 
 Shared Rx queue
diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index a1d475a292..d9a03f12f9 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -126,6 +126,7 @@  static const struct {
 	RTE_RX_OFFLOAD_BIT2STR(OUTER_UDP_CKSUM),
 	RTE_RX_OFFLOAD_BIT2STR(RSS_HASH),
 	RTE_RX_OFFLOAD_BIT2STR(BUFFER_SPLIT),
+	RTE_RX_OFFLOAD_BIT2STR(IP_REASSEMBLY),
 };
 
 #undef RTE_RX_OFFLOAD_BIT2STR
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index fa299c8ad7..11427b2e4d 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -1586,6 +1586,7 @@  struct rte_eth_conf {
 #define RTE_ETH_RX_OFFLOAD_RSS_HASH         RTE_BIT64(19)
 #define DEV_RX_OFFLOAD_RSS_HASH             RTE_ETH_RX_OFFLOAD_RSS_HASH
 #define RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT     RTE_BIT64(20)
+#define RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY    RTE_BIT64(21)
 
 #define RTE_ETH_RX_OFFLOAD_CHECKSUM (RTE_ETH_RX_OFFLOAD_IPV4_CKSUM | \
 				 RTE_ETH_RX_OFFLOAD_UDP_CKSUM | \
@@ -1781,6 +1782,33 @@  enum rte_eth_representor_type {
 	RTE_ETH_REPRESENTOR_PF,   /**< representor of Physical Function. */
 };
 
+/* Flag to offload IP reassembly for IPv4 packets. */
+#define RTE_ETH_DEV_REASSEMBLY_F_IPV4 (RTE_BIT32(0))
+/* Flag to offload IP reassembly for IPv6 packets. */
+#define RTE_ETH_DEV_REASSEMBLY_F_IPV6 (RTE_BIT32(1))
+/**
+ * @warning
+ * @b EXPERIMENTAL: this structure may change without prior notice.
+ *
+ * A structure used to set IP reassembly configuration.
+ *
+ * If RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY flag is set in offloads field,
+ * the PMD will attempt IP reassembly for the received packets as per
+ * properties defined in this structure:
+ *
+ */
+struct rte_eth_ip_reass_params {
+	/** Maximum time in ms which PMD can wait for other fragments. */
+	uint32_t reass_timeout;
+	/** Maximum number of fragments that can be reassembled. */
+	uint16_t max_frags;
+	/**
+	 * Flags to enable reassembly of packet types -
+	 * RTE_ETH_DEV_REASSEMBLY_F_xxx.
+	 */
+	uint16_t flags;
+};
+
 /**
  * A structure used to retrieve the contextual information of
  * an Ethernet device, such as the controlling driver of the
@@ -1841,8 +1869,10 @@  struct rte_eth_dev_info {
 	 * embedded managed interconnect/switch.
 	 */
 	struct rte_eth_switch_info switch_info;
+	/** IP reassembly offload capabilities that a device can support. */
+	struct rte_eth_ip_reass_params reass_capa;
 
-	uint64_t reserved_64s[2]; /**< Reserved for future fields */
+	uint64_t reserved_64s[1]; /**< Reserved for future fields */
 	void *reserved_ptrs[2];   /**< Reserved for future fields */
 };