[v2,5/6] net/vhost: perform SW checksum in Rx path

Message ID 20220608124946.102623-6-maxime.coquelin@redhat.com (mailing list archive)
State Accepted, archived
Delegated to: Maxime Coquelin
Headers
Series Vhost checksum offload improvements |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Maxime Coquelin June 8, 2022, 12:49 p.m. UTC
  Virtio specification supports host checksum offloading
for L4, which is enabled with VIRTIO_NET_F_CSUM feature
negotiation. However, the Vhost PMD does not advertise
Rx checksum offload capabilities, so we can end-up with
the VIRTIO_NET_F_CSUM feature being negotiated, implying
the Vhost library returns packets with checksum being
offloaded while the application did not request for it.

Advertising these offload capabilities at the ethdev level
is not enough, because we could still end-up with the
application not enabling these offloads while the guest
still negotiate them.

This patch advertises the Rx checksum offload capabilities,
and introduces a compatibility layer to cover the case
VIRTIO_NET_F_CSUM has been negotiated but the application
does not configure the Rx checksum offloads. This function
performis the L4 Rx checksum in SW for UDP and TCP. Note
that it is not needed to calculate the pseudo-header
checksum, because the Virtio specification requires that
the driver do it.

This patch does not advertise SCTP checksum offloading
capability for now, but it could be handled later if the
need arises.

Reported-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 doc/guides/nics/features/vhost.ini |  1 +
 drivers/net/vhost/rte_eth_vhost.c  | 83 ++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+)
  

Comments

Chenbo Xia June 9, 2022, 1:59 a.m. UTC | #1
Hi Maxime,

> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Wednesday, June 8, 2022 8:50 PM
> To: dev@dpdk.org; jasowang@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>;
> david.marchand@redhat.com; Matz, Olivier <olivier.matz@6wind.com>; Ma,
> WenwuX <wenwux.ma@intel.com>; Zhang, Yuying <yuying.zhang@intel.com>;
> Singh, Aman Deep <aman.deep.singh@intel.com>
> Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
> Subject: [PATCH v2 5/6] net/vhost: perform SW checksum in Rx path
> 
> Virtio specification supports host checksum offloading
> for L4, which is enabled with VIRTIO_NET_F_CSUM feature
> negotiation. However, the Vhost PMD does not advertise
> Rx checksum offload capabilities, so we can end-up with
> the VIRTIO_NET_F_CSUM feature being negotiated, implying
> the Vhost library returns packets with checksum being
> offloaded while the application did not request for it.
> 
> Advertising these offload capabilities at the ethdev level
> is not enough, because we could still end-up with the
> application not enabling these offloads while the guest
> still negotiate them.
> 
> This patch advertises the Rx checksum offload capabilities,
> and introduces a compatibility layer to cover the case
> VIRTIO_NET_F_CSUM has been negotiated but the application
> does not configure the Rx checksum offloads. This function
> performis the L4 Rx checksum in SW for UDP and TCP. Note

performs

> that it is not needed to calculate the pseudo-header
> checksum, because the Virtio specification requires that
> the driver do it.
> 
> This patch does not advertise SCTP checksum offloading
> capability for now, but it could be handled later if the
> need arises.
> 
> Reported-by: Jason Wang <jasowang@redhat.com>
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
>  doc/guides/nics/features/vhost.ini |  1 +
>  drivers/net/vhost/rte_eth_vhost.c  | 83 ++++++++++++++++++++++++++++++
>  2 files changed, 84 insertions(+)
> 
> diff --git a/doc/guides/nics/features/vhost.ini
> b/doc/guides/nics/features/vhost.ini
> index ef81abb439..15f4dfe5e8 100644
> --- a/doc/guides/nics/features/vhost.ini
> +++ b/doc/guides/nics/features/vhost.ini
> @@ -7,6 +7,7 @@
>  Link status          = Y
>  Free Tx mbuf on demand = Y
>  Queue status event   = Y
> +L4 checksum offload  = P
>  Basic stats          = Y
>  Extended stats       = Y
>  x86-32               = Y
> diff --git a/drivers/net/vhost/rte_eth_vhost.c
> b/drivers/net/vhost/rte_eth_vhost.c
> index e931d59053..42f0d52ebc 100644
> --- a/drivers/net/vhost/rte_eth_vhost.c
> +++ b/drivers/net/vhost/rte_eth_vhost.c
> @@ -12,6 +12,7 @@
>  #include <ethdev_vdev.h>
>  #include <rte_malloc.h>
>  #include <rte_memcpy.h>
> +#include <rte_net.h>
>  #include <rte_bus_vdev.h>
>  #include <rte_kvargs.h>
>  #include <rte_vhost.h>
> @@ -85,10 +86,12 @@ struct pmd_internal {
>  	char *iface_name;
>  	uint64_t flags;
>  	uint64_t disable_flags;
> +	uint64_t features;
>  	uint16_t max_queues;
>  	int vid;
>  	rte_atomic32_t started;
>  	bool vlan_strip;
> +	bool rx_sw_csum;
>  };
> 
>  struct internal_list {
> @@ -275,6 +278,70 @@ vhost_dev_xstats_get(struct rte_eth_dev *dev, struct
> rte_eth_xstat *xstats,
>  	return nstats;
>  }
> 
> +static void
> +vhost_dev_csum_configure(struct rte_eth_dev *eth_dev)
> +{
> +	struct pmd_internal *internal = eth_dev->data->dev_private;
> +	const struct rte_eth_rxmode *rxmode = &eth_dev->data-
> >dev_conf.rxmode;
> +
> +	internal->rx_sw_csum = false;
> +
> +	/* SW checksum is not compatible with legacy mode */
> +	if (!(internal->flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS))
> +		return;
> +
> +	if (internal->features & (1ULL << VIRTIO_NET_F_CSUM)) {
> +		if (!(rxmode->offloads &
> +				(RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
> RTE_ETH_RX_OFFLOAD_TCP_CKSUM))) {
> +			VHOST_LOG(NOTICE, "Rx csum will be done in SW, may
> impact performance.");

Missing \n

With above fixed:

Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>

> +			internal->rx_sw_csum = true;
> +		}
> +	}
> +}
> +
> +static void
> +vhost_dev_rx_sw_csum(struct rte_mbuf *mbuf)
> +{
> +	struct rte_net_hdr_lens hdr_lens;
> +	uint32_t ptype, hdr_len;
> +	uint16_t csum = 0, csum_offset;
> +
> +	/* Return early if the L4 checksum was not offloaded */
> +	if ((mbuf->ol_flags & RTE_MBUF_F_RX_L4_CKSUM_MASK) !=
> RTE_MBUF_F_RX_L4_CKSUM_NONE)
> +		return;
> +
> +	ptype = rte_net_get_ptype(mbuf, &hdr_lens, RTE_PTYPE_ALL_MASK);
> +
> +	hdr_len = hdr_lens.l2_len + hdr_lens.l3_len;
> +
> +	switch (ptype & RTE_PTYPE_L4_MASK) {
> +	case RTE_PTYPE_L4_TCP:
> +		csum_offset = offsetof(struct rte_tcp_hdr, cksum) + hdr_len;
> +		break;
> +	case RTE_PTYPE_L4_UDP:
> +		csum_offset = offsetof(struct rte_udp_hdr, dgram_cksum) +
> hdr_len;
> +		break;
> +	default:
> +		/* Unsupported packet type */
> +		return;
> +	}
> +
> +	/* The pseudo-header checksum is already performed, as per Virtio
> spec */
> +	if (rte_raw_cksum_mbuf(mbuf, hdr_len, rte_pktmbuf_pkt_len(mbuf) -
> hdr_len, &csum) < 0)
> +		return;
> +
> +	csum = ~csum;
> +	/* See RFC768 */
> +	if (unlikely((ptype & RTE_PTYPE_L4_UDP) && csum == 0))
> +		csum = 0xffff;
> +
> +	if (rte_pktmbuf_data_len(mbuf) >= csum_offset + 1)
> +		*rte_pktmbuf_mtod_offset(mbuf, uint16_t *, csum_offset) = csum;
> +
> +	mbuf->ol_flags &= ~RTE_MBUF_F_RX_L4_CKSUM_MASK;
> +	mbuf->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
> +}
> +
>  static uint16_t
>  eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
>  {
> @@ -315,6 +382,9 @@ eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t
> nb_bufs)
>  		if (r->internal->vlan_strip)
>  			rte_vlan_strip(bufs[i]);
> 
> +		if (r->internal->rx_sw_csum)
> +			vhost_dev_rx_sw_csum(bufs[i]);
> +
>  		r->stats.bytes += bufs[i]->pkt_len;
>  	}
> 
> @@ -711,6 +781,11 @@ new_device(int vid)
>  		eth_dev->data->numa_node = newnode;
>  #endif
> 
> +	if (rte_vhost_get_negotiated_features(vid, &internal->features)) {
> +		VHOST_LOG(ERR, "Failed to get device features\n");
> +		return -1;
> +	}
> +
>  	internal->vid = vid;
>  	if (rte_atomic32_read(&internal->started) == 1) {
>  		queue_setup(eth_dev, internal);
> @@ -733,6 +808,8 @@ new_device(int vid)
> 
>  	eth_dev->data->dev_link.link_status = RTE_ETH_LINK_UP;
> 
> +	vhost_dev_csum_configure(eth_dev);
> +
>  	rte_atomic32_set(&internal->dev_attached, 1);
>  	update_queuing_status(eth_dev);
> 
> @@ -1039,6 +1116,8 @@ eth_dev_configure(struct rte_eth_dev *dev)
> 
>  	internal->vlan_strip = !!(rxmode->offloads &
> RTE_ETH_RX_OFFLOAD_VLAN_STRIP);
> 
> +	vhost_dev_csum_configure(dev);
> +
>  	return 0;
>  }
> 
> @@ -1189,6 +1268,10 @@ eth_dev_info(struct rte_eth_dev *dev,
>  	dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
>  				RTE_ETH_TX_OFFLOAD_VLAN_INSERT;
>  	dev_info->rx_offload_capa = RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
> +	if (internal->flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS) {
> +		dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
> +			RTE_ETH_RX_OFFLOAD_TCP_CKSUM;
> +	}
> 
>  	return 0;
>  }
> --
> 2.35.3
  
Chenbo Xia June 10, 2022, 3:49 a.m. UTC | #2
+ Cheng for review

> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Wednesday, June 8, 2022 8:50 PM
> To: dev@dpdk.org; jasowang@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>;
> david.marchand@redhat.com; Matz, Olivier <olivier.matz@6wind.com>; Ma,
> WenwuX <wenwux.ma@intel.com>; Zhang, Yuying <yuying.zhang@intel.com>;
> Singh, Aman Deep <aman.deep.singh@intel.com>
> Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
> Subject: [PATCH v2 5/6] net/vhost: perform SW checksum in Rx path
> 
> Virtio specification supports host checksum offloading
> for L4, which is enabled with VIRTIO_NET_F_CSUM feature
> negotiation. However, the Vhost PMD does not advertise
> Rx checksum offload capabilities, so we can end-up with
> the VIRTIO_NET_F_CSUM feature being negotiated, implying
> the Vhost library returns packets with checksum being
> offloaded while the application did not request for it.
> 
> Advertising these offload capabilities at the ethdev level
> is not enough, because we could still end-up with the
> application not enabling these offloads while the guest
> still negotiate them.
> 
> This patch advertises the Rx checksum offload capabilities,
> and introduces a compatibility layer to cover the case
> VIRTIO_NET_F_CSUM has been negotiated but the application
> does not configure the Rx checksum offloads. This function
> performis the L4 Rx checksum in SW for UDP and TCP. Note
> that it is not needed to calculate the pseudo-header
> checksum, because the Virtio specification requires that
> the driver do it.
> 
> This patch does not advertise SCTP checksum offloading
> capability for now, but it could be handled later if the
> need arises.
> 
> Reported-by: Jason Wang <jasowang@redhat.com>
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
>  doc/guides/nics/features/vhost.ini |  1 +
>  drivers/net/vhost/rte_eth_vhost.c  | 83 ++++++++++++++++++++++++++++++
>  2 files changed, 84 insertions(+)
> 
> diff --git a/doc/guides/nics/features/vhost.ini
> b/doc/guides/nics/features/vhost.ini
> index ef81abb439..15f4dfe5e8 100644
> --- a/doc/guides/nics/features/vhost.ini
> +++ b/doc/guides/nics/features/vhost.ini
> @@ -7,6 +7,7 @@
>  Link status          = Y
>  Free Tx mbuf on demand = Y
>  Queue status event   = Y
> +L4 checksum offload  = P
>  Basic stats          = Y
>  Extended stats       = Y
>  x86-32               = Y
> diff --git a/drivers/net/vhost/rte_eth_vhost.c
> b/drivers/net/vhost/rte_eth_vhost.c
> index e931d59053..42f0d52ebc 100644
> --- a/drivers/net/vhost/rte_eth_vhost.c
> +++ b/drivers/net/vhost/rte_eth_vhost.c
> @@ -12,6 +12,7 @@
>  #include <ethdev_vdev.h>
>  #include <rte_malloc.h>
>  #include <rte_memcpy.h>
> +#include <rte_net.h>
>  #include <rte_bus_vdev.h>
>  #include <rte_kvargs.h>
>  #include <rte_vhost.h>
> @@ -85,10 +86,12 @@ struct pmd_internal {
>  	char *iface_name;
>  	uint64_t flags;
>  	uint64_t disable_flags;
> +	uint64_t features;
>  	uint16_t max_queues;
>  	int vid;
>  	rte_atomic32_t started;
>  	bool vlan_strip;
> +	bool rx_sw_csum;
>  };
> 
>  struct internal_list {
> @@ -275,6 +278,70 @@ vhost_dev_xstats_get(struct rte_eth_dev *dev, struct
> rte_eth_xstat *xstats,
>  	return nstats;
>  }
> 
> +static void
> +vhost_dev_csum_configure(struct rte_eth_dev *eth_dev)
> +{
> +	struct pmd_internal *internal = eth_dev->data->dev_private;
> +	const struct rte_eth_rxmode *rxmode = &eth_dev->data-
> >dev_conf.rxmode;
> +
> +	internal->rx_sw_csum = false;
> +
> +	/* SW checksum is not compatible with legacy mode */
> +	if (!(internal->flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS))
> +		return;
> +
> +	if (internal->features & (1ULL << VIRTIO_NET_F_CSUM)) {
> +		if (!(rxmode->offloads &
> +				(RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
> RTE_ETH_RX_OFFLOAD_TCP_CKSUM))) {
> +			VHOST_LOG(NOTICE, "Rx csum will be done in SW, may
> impact performance.");
> +			internal->rx_sw_csum = true;
> +		}
> +	}
> +}
> +
> +static void
> +vhost_dev_rx_sw_csum(struct rte_mbuf *mbuf)
> +{
> +	struct rte_net_hdr_lens hdr_lens;
> +	uint32_t ptype, hdr_len;
> +	uint16_t csum = 0, csum_offset;
> +
> +	/* Return early if the L4 checksum was not offloaded */
> +	if ((mbuf->ol_flags & RTE_MBUF_F_RX_L4_CKSUM_MASK) !=
> RTE_MBUF_F_RX_L4_CKSUM_NONE)
> +		return;
> +
> +	ptype = rte_net_get_ptype(mbuf, &hdr_lens, RTE_PTYPE_ALL_MASK);
> +
> +	hdr_len = hdr_lens.l2_len + hdr_lens.l3_len;
> +
> +	switch (ptype & RTE_PTYPE_L4_MASK) {
> +	case RTE_PTYPE_L4_TCP:
> +		csum_offset = offsetof(struct rte_tcp_hdr, cksum) + hdr_len;
> +		break;
> +	case RTE_PTYPE_L4_UDP:
> +		csum_offset = offsetof(struct rte_udp_hdr, dgram_cksum) +
> hdr_len;
> +		break;
> +	default:
> +		/* Unsupported packet type */
> +		return;
> +	}
> +
> +	/* The pseudo-header checksum is already performed, as per Virtio
> spec */
> +	if (rte_raw_cksum_mbuf(mbuf, hdr_len, rte_pktmbuf_pkt_len(mbuf) -
> hdr_len, &csum) < 0)
> +		return;
> +
> +	csum = ~csum;
> +	/* See RFC768 */
> +	if (unlikely((ptype & RTE_PTYPE_L4_UDP) && csum == 0))
> +		csum = 0xffff;
> +
> +	if (rte_pktmbuf_data_len(mbuf) >= csum_offset + 1)
> +		*rte_pktmbuf_mtod_offset(mbuf, uint16_t *, csum_offset) = csum;
> +
> +	mbuf->ol_flags &= ~RTE_MBUF_F_RX_L4_CKSUM_MASK;
> +	mbuf->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
> +}
> +
>  static uint16_t
>  eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
>  {
> @@ -315,6 +382,9 @@ eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t
> nb_bufs)
>  		if (r->internal->vlan_strip)
>  			rte_vlan_strip(bufs[i]);
> 
> +		if (r->internal->rx_sw_csum)
> +			vhost_dev_rx_sw_csum(bufs[i]);
> +
>  		r->stats.bytes += bufs[i]->pkt_len;
>  	}
> 
> @@ -711,6 +781,11 @@ new_device(int vid)
>  		eth_dev->data->numa_node = newnode;
>  #endif
> 
> +	if (rte_vhost_get_negotiated_features(vid, &internal->features)) {
> +		VHOST_LOG(ERR, "Failed to get device features\n");
> +		return -1;
> +	}
> +
>  	internal->vid = vid;
>  	if (rte_atomic32_read(&internal->started) == 1) {
>  		queue_setup(eth_dev, internal);
> @@ -733,6 +808,8 @@ new_device(int vid)
> 
>  	eth_dev->data->dev_link.link_status = RTE_ETH_LINK_UP;
> 
> +	vhost_dev_csum_configure(eth_dev);
> +
>  	rte_atomic32_set(&internal->dev_attached, 1);
>  	update_queuing_status(eth_dev);
> 
> @@ -1039,6 +1116,8 @@ eth_dev_configure(struct rte_eth_dev *dev)
> 
>  	internal->vlan_strip = !!(rxmode->offloads &
> RTE_ETH_RX_OFFLOAD_VLAN_STRIP);
> 
> +	vhost_dev_csum_configure(dev);
> +
>  	return 0;
>  }
> 
> @@ -1189,6 +1268,10 @@ eth_dev_info(struct rte_eth_dev *dev,
>  	dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
>  				RTE_ETH_TX_OFFLOAD_VLAN_INSERT;
>  	dev_info->rx_offload_capa = RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
> +	if (internal->flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS) {
> +		dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
> +			RTE_ETH_RX_OFFLOAD_TCP_CKSUM;
> +	}
> 
>  	return 0;
>  }
> --
> 2.35.3
  
Jiang, Cheng1 June 10, 2022, 7:19 a.m. UTC | #3
Hi Maxime,

> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Friday, June 10, 2022 11:49 AM
> To: Maxime Coquelin <maxime.coquelin@redhat.com>; dev@dpdk.org;
> jasowang@redhat.com; david.marchand@redhat.com; Matz, Olivier
> <olivier.matz@6wind.com>; Ma, WenwuX <wenwux.ma@intel.com>; Zhang,
> Yuying <yuying.zhang@intel.com>; Singh, Aman Deep
> <aman.deep.singh@intel.com>; Jiang, Cheng1 <cheng1.jiang@intel.com>
> Subject: RE: [PATCH v2 5/6] net/vhost: perform SW checksum in Rx path
> 
> + Cheng for review
> 
> > -----Original Message-----
> > From: Maxime Coquelin <maxime.coquelin@redhat.com>
> > Sent: Wednesday, June 8, 2022 8:50 PM
> > To: dev@dpdk.org; jasowang@redhat.com; Xia, Chenbo
> > <chenbo.xia@intel.com>; david.marchand@redhat.com; Matz, Olivier
> > <olivier.matz@6wind.com>; Ma, WenwuX <wenwux.ma@intel.com>;
> Zhang,
> > Yuying <yuying.zhang@intel.com>; Singh, Aman Deep
> > <aman.deep.singh@intel.com>
> > Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
> > Subject: [PATCH v2 5/6] net/vhost: perform SW checksum in Rx path
> >
> > Virtio specification supports host checksum offloading for L4, which
> > is enabled with VIRTIO_NET_F_CSUM feature negotiation. However, the
> > Vhost PMD does not advertise Rx checksum offload capabilities, so we
> > can end-up with the VIRTIO_NET_F_CSUM feature being negotiated,
> > implying the Vhost library returns packets with checksum being
> > offloaded while the application did not request for it.
> >
> > Advertising these offload capabilities at the ethdev level is not
> > enough, because we could still end-up with the application not
> > enabling these offloads while the guest still negotiate them.
> >
> > This patch advertises the Rx checksum offload capabilities, and
> > introduces a compatibility layer to cover the case VIRTIO_NET_F_CSUM
> > has been negotiated but the application does not configure the Rx
> > checksum offloads. This function performis the L4 Rx checksum in SW
> > for UDP and TCP. Note that it is not needed to calculate the
> > pseudo-header checksum, because the Virtio specification requires that
> > the driver do it.
> >
> > This patch does not advertise SCTP checksum offloading capability for
> > now, but it could be handled later if the need arises.
> >
> > Reported-by: Jason Wang <jasowang@redhat.com>
> > Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> > ---
> >

Reviewed-by: Cheng Jiang <cheng1.jiang@intel.com>
  

Patch

diff --git a/doc/guides/nics/features/vhost.ini b/doc/guides/nics/features/vhost.ini
index ef81abb439..15f4dfe5e8 100644
--- a/doc/guides/nics/features/vhost.ini
+++ b/doc/guides/nics/features/vhost.ini
@@ -7,6 +7,7 @@ 
 Link status          = Y
 Free Tx mbuf on demand = Y
 Queue status event   = Y
+L4 checksum offload  = P
 Basic stats          = Y
 Extended stats       = Y
 x86-32               = Y
diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
index e931d59053..42f0d52ebc 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -12,6 +12,7 @@ 
 #include <ethdev_vdev.h>
 #include <rte_malloc.h>
 #include <rte_memcpy.h>
+#include <rte_net.h>
 #include <rte_bus_vdev.h>
 #include <rte_kvargs.h>
 #include <rte_vhost.h>
@@ -85,10 +86,12 @@  struct pmd_internal {
 	char *iface_name;
 	uint64_t flags;
 	uint64_t disable_flags;
+	uint64_t features;
 	uint16_t max_queues;
 	int vid;
 	rte_atomic32_t started;
 	bool vlan_strip;
+	bool rx_sw_csum;
 };
 
 struct internal_list {
@@ -275,6 +278,70 @@  vhost_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
 	return nstats;
 }
 
+static void
+vhost_dev_csum_configure(struct rte_eth_dev *eth_dev)
+{
+	struct pmd_internal *internal = eth_dev->data->dev_private;
+	const struct rte_eth_rxmode *rxmode = &eth_dev->data->dev_conf.rxmode;
+
+	internal->rx_sw_csum = false;
+
+	/* SW checksum is not compatible with legacy mode */
+	if (!(internal->flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS))
+		return;
+
+	if (internal->features & (1ULL << VIRTIO_NET_F_CSUM)) {
+		if (!(rxmode->offloads &
+				(RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_TCP_CKSUM))) {
+			VHOST_LOG(NOTICE, "Rx csum will be done in SW, may impact performance.");
+			internal->rx_sw_csum = true;
+		}
+	}
+}
+
+static void
+vhost_dev_rx_sw_csum(struct rte_mbuf *mbuf)
+{
+	struct rte_net_hdr_lens hdr_lens;
+	uint32_t ptype, hdr_len;
+	uint16_t csum = 0, csum_offset;
+
+	/* Return early if the L4 checksum was not offloaded */
+	if ((mbuf->ol_flags & RTE_MBUF_F_RX_L4_CKSUM_MASK) != RTE_MBUF_F_RX_L4_CKSUM_NONE)
+		return;
+
+	ptype = rte_net_get_ptype(mbuf, &hdr_lens, RTE_PTYPE_ALL_MASK);
+
+	hdr_len = hdr_lens.l2_len + hdr_lens.l3_len;
+
+	switch (ptype & RTE_PTYPE_L4_MASK) {
+	case RTE_PTYPE_L4_TCP:
+		csum_offset = offsetof(struct rte_tcp_hdr, cksum) + hdr_len;
+		break;
+	case RTE_PTYPE_L4_UDP:
+		csum_offset = offsetof(struct rte_udp_hdr, dgram_cksum) + hdr_len;
+		break;
+	default:
+		/* Unsupported packet type */
+		return;
+	}
+
+	/* The pseudo-header checksum is already performed, as per Virtio spec */
+	if (rte_raw_cksum_mbuf(mbuf, hdr_len, rte_pktmbuf_pkt_len(mbuf) - hdr_len, &csum) < 0)
+		return;
+
+	csum = ~csum;
+	/* See RFC768 */
+	if (unlikely((ptype & RTE_PTYPE_L4_UDP) && csum == 0))
+		csum = 0xffff;
+
+	if (rte_pktmbuf_data_len(mbuf) >= csum_offset + 1)
+		*rte_pktmbuf_mtod_offset(mbuf, uint16_t *, csum_offset) = csum;
+
+	mbuf->ol_flags &= ~RTE_MBUF_F_RX_L4_CKSUM_MASK;
+	mbuf->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
+}
+
 static uint16_t
 eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 {
@@ -315,6 +382,9 @@  eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 		if (r->internal->vlan_strip)
 			rte_vlan_strip(bufs[i]);
 
+		if (r->internal->rx_sw_csum)
+			vhost_dev_rx_sw_csum(bufs[i]);
+
 		r->stats.bytes += bufs[i]->pkt_len;
 	}
 
@@ -711,6 +781,11 @@  new_device(int vid)
 		eth_dev->data->numa_node = newnode;
 #endif
 
+	if (rte_vhost_get_negotiated_features(vid, &internal->features)) {
+		VHOST_LOG(ERR, "Failed to get device features\n");
+		return -1;
+	}
+
 	internal->vid = vid;
 	if (rte_atomic32_read(&internal->started) == 1) {
 		queue_setup(eth_dev, internal);
@@ -733,6 +808,8 @@  new_device(int vid)
 
 	eth_dev->data->dev_link.link_status = RTE_ETH_LINK_UP;
 
+	vhost_dev_csum_configure(eth_dev);
+
 	rte_atomic32_set(&internal->dev_attached, 1);
 	update_queuing_status(eth_dev);
 
@@ -1039,6 +1116,8 @@  eth_dev_configure(struct rte_eth_dev *dev)
 
 	internal->vlan_strip = !!(rxmode->offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP);
 
+	vhost_dev_csum_configure(dev);
+
 	return 0;
 }
 
@@ -1189,6 +1268,10 @@  eth_dev_info(struct rte_eth_dev *dev,
 	dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
 				RTE_ETH_TX_OFFLOAD_VLAN_INSERT;
 	dev_info->rx_offload_capa = RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
+	if (internal->flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS) {
+		dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
+			RTE_ETH_RX_OFFLOAD_TCP_CKSUM;
+	}
 
 	return 0;
 }