[v3] app/testpmd: fix l4 sw csum over multi segments

Message ID 20211020101243.203063-1-xiaoyun.li@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series [v3] app/testpmd: fix l4 sw csum over multi segments |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/github-robot: build success github build: passed
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-aarch64-compile-testing success Testing PASS
ci/iol-x86_64-compile-testing success Testing PASS
ci/iol-x86_64-unit-testing fail Testing issues
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS

Commit Message

Li, Xiaoyun Oct. 20, 2021, 10:12 a.m. UTC
  In csum forwarding mode, software UDP/TCP csum calculation only takes
the first segment into account while using the whole packet length so
the calculation will read invalid memory region with multi-segments
packets and will get wrong value.
This patch fixes this issue.

Fixes: af75078fece3 ("first public release")
Cc: stable@dpdk.org

Signed-off-by: Xiaoyun Li <xiaoyun.li@intel.com>
---
v3:
 * Use rte_raw_cksum() for multi-segs case instead of copying the whole
 * packet.
v2:
 * Use static stack memory instead of dynamic allocating in datapath
---
 app/test-pmd/csumonly.c | 68 ++++++++++++++++++++++++++++++++---------
 1 file changed, 53 insertions(+), 15 deletions(-)
  

Comments

Ferruh Yigit Oct. 27, 2021, 10:48 a.m. UTC | #1
On 10/20/2021 11:12 AM, Xiaoyun Li wrote:
> In csum forwarding mode, software UDP/TCP csum calculation only takes
> the first segment into account while using the whole packet length so
> the calculation will read invalid memory region with multi-segments
> packets and will get wrong value.
> This patch fixes this issue.
> 
> Fixes: af75078fece3 ("first public release")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Xiaoyun Li <xiaoyun.li@intel.com>
> ---
> v3:
>   * Use rte_raw_cksum() for multi-segs case instead of copying the whole
>   * packet.
> v2:
>   * Use static stack memory instead of dynamic allocating in datapath
> ---
>   app/test-pmd/csumonly.c | 68 ++++++++++++++++++++++++++++++++---------
>   1 file changed, 53 insertions(+), 15 deletions(-)
> 
> diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
> index 090797318a..f3e60eb3c3 100644
> --- a/app/test-pmd/csumonly.c
> +++ b/app/test-pmd/csumonly.c
> @@ -91,12 +91,41 @@ struct simple_gre_hdr {
>   } __rte_packed;
>   
>   static uint16_t
> -get_udptcp_checksum(void *l3_hdr, void *l4_hdr, uint16_t ethertype)
> +get_udptcp_checksum(void *l3_hdr, struct rte_mbuf *m, uint16_t l4_off,
> +		    uint16_t ethertype)
>   {
> +	uint16_t off = l4_off;
> +	uint32_t cksum = 0;
> +	char *buf;
> +
> +	while (m != NULL) {
> +		buf = rte_pktmbuf_mtod_offset(m, char *, off);
> +		cksum += rte_raw_cksum(buf, m->data_len - off);
> +		off = 0;
> +		m = m->next;
> +	}
>   	if (ethertype == _htons(RTE_ETHER_TYPE_IPV4))
> -		return rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr);
> +		cksum += rte_ipv4_phdr_cksum(l3_hdr, 0);
>   	else /* assume ethertype == RTE_ETHER_TYPE_IPV6 */
> -		return rte_ipv6_udptcp_cksum(l3_hdr, l4_hdr);
> +		cksum += rte_ipv6_phdr_cksum(l3_hdr, 0);
> +

Hi Xiaoyun,

I can see 'rte_ipv[46]_udptcp_cksum()' is not taking multi segment mbuf
into account, so this fix is required,
but instead of implementing this logic into testpmd, what do you think
to have APIs to support multi segment mbufs?
This way other applications also benefit from it and we don't need to
maintain ip4/6 checksum related code in testpmd.

btw, how are you testing this?
  
Morten Brørup Oct. 27, 2021, 11:29 a.m. UTC | #2
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ferruh Yigit
> Sent: Wednesday, 27 October 2021 12.49
> 
> On 10/20/2021 11:12 AM, Xiaoyun Li wrote:
> > In csum forwarding mode, software UDP/TCP csum calculation only takes
> > the first segment into account while using the whole packet length so
> > the calculation will read invalid memory region with multi-segments
> > packets and will get wrong value.
> > This patch fixes this issue.
> >
> > Fixes: af75078fece3 ("first public release")
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Xiaoyun Li <xiaoyun.li@intel.com>
> > ---
> > v3:
> >   * Use rte_raw_cksum() for multi-segs case instead of copying the
> whole
> >   * packet.
> > v2:
> >   * Use static stack memory instead of dynamic allocating in datapath
> > ---
> >   app/test-pmd/csumonly.c | 68 ++++++++++++++++++++++++++++++++------
> ---
> >   1 file changed, 53 insertions(+), 15 deletions(-)
> >
> > diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
> > index 090797318a..f3e60eb3c3 100644
> > --- a/app/test-pmd/csumonly.c
> > +++ b/app/test-pmd/csumonly.c
> > @@ -91,12 +91,41 @@ struct simple_gre_hdr {
> >   } __rte_packed;
> >
> >   static uint16_t
> > -get_udptcp_checksum(void *l3_hdr, void *l4_hdr, uint16_t ethertype)
> > +get_udptcp_checksum(void *l3_hdr, struct rte_mbuf *m, uint16_t
> l4_off,
> > +		    uint16_t ethertype)
> >   {
> > +	uint16_t off = l4_off;
> > +	uint32_t cksum = 0;
> > +	char *buf;
> > +
> > +	while (m != NULL) {
> > +		buf = rte_pktmbuf_mtod_offset(m, char *, off);
> > +		cksum += rte_raw_cksum(buf, m->data_len - off);
> > +		off = 0;
> > +		m = m->next;
> > +	}
> >   	if (ethertype == _htons(RTE_ETHER_TYPE_IPV4))
> > -		return rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr);
> > +		cksum += rte_ipv4_phdr_cksum(l3_hdr, 0);
> >   	else /* assume ethertype == RTE_ETHER_TYPE_IPV6 */
> > -		return rte_ipv6_udptcp_cksum(l3_hdr, l4_hdr);
> > +		cksum += rte_ipv6_phdr_cksum(l3_hdr, 0);
> > +
> 
> Hi Xiaoyun,
> 
> I can see 'rte_ipv[46]_udptcp_cksum()' is not taking multi segment mbuf
> into account, so this fix is required,
> but instead of implementing this logic into testpmd, what do you think
> to have APIs to support multi segment mbufs?
> This way other applications also benefit from it and we don't need to
> maintain ip4/6 checksum related code in testpmd.

+1

Also, there is no need to implement the multi-segment raw checksum loop in test-pmd.

You can use the multi-segment raw checksum function in the net library instead:
http://code.dpdk.org/dpdk/latest/source/lib/net/rte_ip.h#L224

> 
> btw, how are you testing this?
  
Olivier Matz Oct. 29, 2021, 8:29 a.m. UTC | #3
On Wed, Oct 27, 2021 at 01:29:52PM +0200, Morten Brørup wrote:
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ferruh Yigit
> > Sent: Wednesday, 27 October 2021 12.49
> > 
> > On 10/20/2021 11:12 AM, Xiaoyun Li wrote:
> > > In csum forwarding mode, software UDP/TCP csum calculation only takes
> > > the first segment into account while using the whole packet length so
> > > the calculation will read invalid memory region with multi-segments
> > > packets and will get wrong value.
> > > This patch fixes this issue.
> > >
> > > Fixes: af75078fece3 ("first public release")
> > > Cc: stable@dpdk.org
> > >
> > > Signed-off-by: Xiaoyun Li <xiaoyun.li@intel.com>
> > > ---
> > > v3:
> > >   * Use rte_raw_cksum() for multi-segs case instead of copying the
> > whole
> > >   * packet.
> > > v2:
> > >   * Use static stack memory instead of dynamic allocating in datapath
> > > ---
> > >   app/test-pmd/csumonly.c | 68 ++++++++++++++++++++++++++++++++------
> > ---
> > >   1 file changed, 53 insertions(+), 15 deletions(-)
> > >
> > > diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
> > > index 090797318a..f3e60eb3c3 100644
> > > --- a/app/test-pmd/csumonly.c
> > > +++ b/app/test-pmd/csumonly.c
> > > @@ -91,12 +91,41 @@ struct simple_gre_hdr {
> > >   } __rte_packed;
> > >
> > >   static uint16_t
> > > -get_udptcp_checksum(void *l3_hdr, void *l4_hdr, uint16_t ethertype)
> > > +get_udptcp_checksum(void *l3_hdr, struct rte_mbuf *m, uint16_t
> > l4_off,
> > > +		    uint16_t ethertype)
> > >   {
> > > +	uint16_t off = l4_off;
> > > +	uint32_t cksum = 0;
> > > +	char *buf;
> > > +
> > > +	while (m != NULL) {
> > > +		buf = rte_pktmbuf_mtod_offset(m, char *, off);
> > > +		cksum += rte_raw_cksum(buf, m->data_len - off);
> > > +		off = 0;
> > > +		m = m->next;
> > > +	}
> > >   	if (ethertype == _htons(RTE_ETHER_TYPE_IPV4))
> > > -		return rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr);
> > > +		cksum += rte_ipv4_phdr_cksum(l3_hdr, 0);
> > >   	else /* assume ethertype == RTE_ETHER_TYPE_IPV6 */
> > > -		return rte_ipv6_udptcp_cksum(l3_hdr, l4_hdr);
> > > +		cksum += rte_ipv6_phdr_cksum(l3_hdr, 0);
> > > +
> > 
> > Hi Xiaoyun,
> > 
> > I can see 'rte_ipv[46]_udptcp_cksum()' is not taking multi segment mbuf
> > into account, so this fix is required,
> > but instead of implementing this logic into testpmd, what do you think
> > to have APIs to support multi segment mbufs?
> > This way other applications also benefit from it and we don't need to
> > maintain ip4/6 checksum related code in testpmd.
> 
> +1
> 
> Also, there is no need to implement the multi-segment raw checksum loop in test-pmd.
> 
> You can use the multi-segment raw checksum function in the net library instead:
> http://code.dpdk.org/dpdk/latest/source/lib/net/rte_ip.h#L224

+1

We can have mbuf variants of udptcp checksum functions:

rte_ipv4_udptcp_cksum()
rte_ipv4_udptcp_cksum_verify()
rte_ipv6_udptcp_cksum()
rte_ipv6_udptcp_cksum_verify()

Adding a "_mbuf" suffix would be consistent with rte_raw_cksum_mbuf().


Olivier
  
Li, Xiaoyun Dec. 3, 2021, 11:31 a.m. UTC | #4
Hi

> -----Original Message-----
> From: Olivier Matz <olivier.matz@6wind.com>
> Sent: Friday, October 29, 2021 09:29
> To: Morten Brørup <mb@smartsharesystems.com>
> Cc: Yigit, Ferruh <ferruh.yigit@intel.com>; Li, Xiaoyun <xiaoyun.li@intel.com>;
> Ananyev, Konstantin <konstantin.ananyev@intel.com>;
> stephen@networkplumber.org; dev@dpdk.org; stable@dpdk.org;
> Medvedkin, Vladimir <vladimir.medvedkin@intel.com>
> Subject: Re: [dpdk-dev] [PATCH v3] app/testpmd: fix l4 sw csum over multi
> segments
> 
> On Wed, Oct 27, 2021 at 01:29:52PM +0200, Morten Brørup wrote:
> > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ferruh Yigit
> > > Sent: Wednesday, 27 October 2021 12.49
> > >
> > > On 10/20/2021 11:12 AM, Xiaoyun Li wrote:
> > > > In csum forwarding mode, software UDP/TCP csum calculation only
> > > > takes the first segment into account while using the whole packet
> > > > length so the calculation will read invalid memory region with
> > > > multi-segments packets and will get wrong value.
> > > > This patch fixes this issue.
> > > >
> > > > Fixes: af75078fece3 ("first public release")
> > > > Cc: stable@dpdk.org
> > > >
> > > > Signed-off-by: Xiaoyun Li <xiaoyun.li@intel.com>
> > > > ---
> > > > v3:
> > > >   * Use rte_raw_cksum() for multi-segs case instead of copying the
> > > whole
> > > >   * packet.
> > > > v2:
> > > >   * Use static stack memory instead of dynamic allocating in
> > > > datapath
> > > > ---
> > > >   app/test-pmd/csumonly.c | 68
> > > > ++++++++++++++++++++++++++++++++------
> > > ---
> > > >   1 file changed, 53 insertions(+), 15 deletions(-)
> > > >
> > > > diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
> > > > index 090797318a..f3e60eb3c3 100644
> > > > --- a/app/test-pmd/csumonly.c
> > > > +++ b/app/test-pmd/csumonly.c
> > > > @@ -91,12 +91,41 @@ struct simple_gre_hdr {
> > > >   } __rte_packed;
> > > >
> > > >   static uint16_t
> > > > -get_udptcp_checksum(void *l3_hdr, void *l4_hdr, uint16_t
> > > > ethertype)
> > > > +get_udptcp_checksum(void *l3_hdr, struct rte_mbuf *m, uint16_t
> > > l4_off,
> > > > +		    uint16_t ethertype)
> > > >   {
> > > > +	uint16_t off = l4_off;
> > > > +	uint32_t cksum = 0;
> > > > +	char *buf;
> > > > +
> > > > +	while (m != NULL) {
> > > > +		buf = rte_pktmbuf_mtod_offset(m, char *, off);
> > > > +		cksum += rte_raw_cksum(buf, m->data_len - off);
> > > > +		off = 0;
> > > > +		m = m->next;
> > > > +	}
> > > >   	if (ethertype == _htons(RTE_ETHER_TYPE_IPV4))
> > > > -		return rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr);
> > > > +		cksum += rte_ipv4_phdr_cksum(l3_hdr, 0);
> > > >   	else /* assume ethertype == RTE_ETHER_TYPE_IPV6 */
> > > > -		return rte_ipv6_udptcp_cksum(l3_hdr, l4_hdr);
> > > > +		cksum += rte_ipv6_phdr_cksum(l3_hdr, 0);
> > > > +
> > >
> > > Hi Xiaoyun,
> > >
> > > I can see 'rte_ipv[46]_udptcp_cksum()' is not taking multi segment
> > > mbuf into account, so this fix is required, but instead of
> > > implementing this logic into testpmd, what do you think to have APIs
> > > to support multi segment mbufs?
> > > This way other applications also benefit from it and we don't need
> > > to maintain ip4/6 checksum related code in testpmd.
> >
> > +1
> >
> > Also, there is no need to implement the multi-segment raw checksum loop
> in test-pmd.
> >
> > You can use the multi-segment raw checksum function in the net library
> instead:
> > http://code.dpdk.org/dpdk/latest/source/lib/net/rte_ip.h#L224
> 
> +1
> 
> We can have mbuf variants of udptcp checksum functions:
> 
> rte_ipv4_udptcp_cksum()
> rte_ipv4_udptcp_cksum_verify()
> rte_ipv6_udptcp_cksum()
> rte_ipv6_udptcp_cksum_verify()
> 
> Adding a "_mbuf" suffix would be consistent with rte_raw_cksum_mbuf().

Thanks for the suggestion. Since it's API change, I'll send these in 22.03 release with release note.
V4 will come soon.

> 
> 
> Olivier
--------------------------------------------------------------
Intel Research and Development Ireland Limited
Registered in Ireland
Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
Registered Number: 308263


This e-mail and any attachments may contain confidential material for the sole
use of the intended recipient(s). Any review or distribution by others is
strictly prohibited. If you are not the intended recipient, please contact the
sender and delete all copies.
  

Patch

diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
index 090797318a..f3e60eb3c3 100644
--- a/app/test-pmd/csumonly.c
+++ b/app/test-pmd/csumonly.c
@@ -91,12 +91,41 @@  struct simple_gre_hdr {
 } __rte_packed;
 
 static uint16_t
-get_udptcp_checksum(void *l3_hdr, void *l4_hdr, uint16_t ethertype)
+get_udptcp_checksum(void *l3_hdr, struct rte_mbuf *m, uint16_t l4_off,
+		    uint16_t ethertype)
 {
+	uint16_t off = l4_off;
+	uint32_t cksum = 0;
+	char *buf;
+
+	while (m != NULL) {
+		buf = rte_pktmbuf_mtod_offset(m, char *, off);
+		cksum += rte_raw_cksum(buf, m->data_len - off);
+		off = 0;
+		m = m->next;
+	}
 	if (ethertype == _htons(RTE_ETHER_TYPE_IPV4))
-		return rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr);
+		cksum += rte_ipv4_phdr_cksum(l3_hdr, 0);
 	else /* assume ethertype == RTE_ETHER_TYPE_IPV6 */
-		return rte_ipv6_udptcp_cksum(l3_hdr, l4_hdr);
+		cksum += rte_ipv6_phdr_cksum(l3_hdr, 0);
+
+	cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
+	cksum = (~cksum) & 0xffff;
+
+	/*
+	 * Per RFC 768:If the computed checksum is zero for UDP,
+	 * it is transmitted as all ones
+	 * (the equivalent in one's complement arithmetic).
+	 */
+	if (cksum == 0 && ethertype == _htons(RTE_ETHER_TYPE_IPV4) &&
+	    ((struct rte_ipv4_hdr *)l3_hdr)->next_proto_id == IPPROTO_UDP)
+		cksum = 0xffff;
+
+	if (cksum == 0 && ethertype == _htons(RTE_ETHER_TYPE_IPV6) &&
+	    ((struct rte_ipv6_hdr *)l3_hdr)->proto == IPPROTO_UDP)
+		cksum = 0xffff;
+
+	return (uint16_t)cksum;
 }
 
 /* Parse an IPv4 header to fill l3_len, l4_len, and l4_proto */
@@ -455,7 +484,7 @@  parse_encap_ip(void *encap_ip, struct testpmd_offload_info *info)
  * depending on the testpmd command line configuration */
 static uint64_t
 process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info *info,
-	uint64_t tx_offloads)
+	uint64_t tx_offloads, struct rte_mbuf *m)
 {
 	struct rte_ipv4_hdr *ipv4_hdr = l3_hdr;
 	struct rte_udp_hdr *udp_hdr;
@@ -463,6 +492,7 @@  process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info *info,
 	struct rte_sctp_hdr *sctp_hdr;
 	uint64_t ol_flags = 0;
 	uint32_t max_pkt_len, tso_segsz = 0;
+	uint16_t l4_off;
 
 	/* ensure packet is large enough to require tso */
 	if (!info->is_tunnel) {
@@ -505,9 +535,15 @@  process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info *info,
 			if (tx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM) {
 				ol_flags |= PKT_TX_UDP_CKSUM;
 			} else {
+				if (info->is_tunnel)
+					l4_off = info->l2_len +
+						info->outer_l3_len +
+						info->l2_len + info->l3_len;
+				else
+					l4_off = info->l2_len +	info->l3_len;
 				udp_hdr->dgram_cksum = 0;
 				udp_hdr->dgram_cksum =
-					get_udptcp_checksum(l3_hdr, udp_hdr,
+					get_udptcp_checksum(l3_hdr, m, l4_off,
 						info->ethertype);
 			}
 		}
@@ -520,9 +556,14 @@  process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info *info,
 		else if (tx_offloads & DEV_TX_OFFLOAD_TCP_CKSUM) {
 			ol_flags |= PKT_TX_TCP_CKSUM;
 		} else {
+			if (info->is_tunnel)
+				l4_off = info->l2_len + info->outer_l3_len +
+					info->l2_len + info->l3_len;
+			else
+				l4_off = info->l2_len + info->l3_len;
 			tcp_hdr->cksum = 0;
 			tcp_hdr->cksum =
-				get_udptcp_checksum(l3_hdr, tcp_hdr,
+				get_udptcp_checksum(l3_hdr, m, l4_off,
 					info->ethertype);
 		}
 		if (info->gso_enable)
@@ -548,7 +589,7 @@  process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info *info,
 /* Calculate the checksum of outer header */
 static uint64_t
 process_outer_cksums(void *outer_l3_hdr, struct testpmd_offload_info *info,
-	uint64_t tx_offloads, int tso_enabled)
+	uint64_t tx_offloads, int tso_enabled, struct rte_mbuf *m)
 {
 	struct rte_ipv4_hdr *ipv4_hdr = outer_l3_hdr;
 	struct rte_ipv6_hdr *ipv6_hdr = outer_l3_hdr;
@@ -602,12 +643,9 @@  process_outer_cksums(void *outer_l3_hdr, struct testpmd_offload_info *info,
 	/* do not recalculate udp cksum if it was 0 */
 	if (udp_hdr->dgram_cksum != 0) {
 		udp_hdr->dgram_cksum = 0;
-		if (info->outer_ethertype == _htons(RTE_ETHER_TYPE_IPV4))
-			udp_hdr->dgram_cksum =
-				rte_ipv4_udptcp_cksum(ipv4_hdr, udp_hdr);
-		else
-			udp_hdr->dgram_cksum =
-				rte_ipv6_udptcp_cksum(ipv6_hdr, udp_hdr);
+		udp_hdr->dgram_cksum = get_udptcp_checksum(outer_l3_hdr,
+					m, info->l2_len + info->outer_l3_len,
+					info->outer_ethertype);
 	}
 
 	return ol_flags;
@@ -942,7 +980,7 @@  pkt_burst_checksum_forward(struct fwd_stream *fs)
 
 		/* process checksums of inner headers first */
 		tx_ol_flags |= process_inner_cksums(l3_hdr, &info,
-			tx_offloads);
+			tx_offloads, m);
 
 		/* Then process outer headers if any. Note that the software
 		 * checksum will be wrong if one of the inner checksums is
@@ -950,7 +988,7 @@  pkt_burst_checksum_forward(struct fwd_stream *fs)
 		if (info.is_tunnel == 1) {
 			tx_ol_flags |= process_outer_cksums(outer_l3_hdr, &info,
 					tx_offloads,
-					!!(tx_ol_flags & PKT_TX_TCP_SEG));
+					!!(tx_ol_flags & PKT_TX_TCP_SEG), m);
 		}
 
 		/* step 3: fill the mbuf meta data (flags and header lengths) */