[dpdk-dev,v10,3/5] net: add a helper for making RARP packet

Message ID 20180110012356.57456-4-xiao.w.wang@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Yuanhan Liu
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Xiao Wang Jan. 10, 2018, 1:23 a.m. UTC
  Suggested-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/librte_net/Makefile            |  1 +
 lib/librte_net/rte_arp.c           | 42 ++++++++++++++++++++++++++++++++++++++
 lib/librte_net/rte_arp.h           | 17 +++++++++++++++
 lib/librte_net/rte_net_version.map |  6 ++++++
 4 files changed, 66 insertions(+)
 create mode 100644 lib/librte_net/rte_arp.c
  

Comments

Yuanhan Liu Jan. 10, 2018, 1:06 p.m. UTC | #1
Thomas, look good to you?

	--yliu

On Wed, Jan 10, 2018 at 09:23:54AM +0800, Xiao Wang wrote:
> Suggested-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
>  lib/librte_net/Makefile            |  1 +
>  lib/librte_net/rte_arp.c           | 42 ++++++++++++++++++++++++++++++++++++++
>  lib/librte_net/rte_arp.h           | 17 +++++++++++++++
>  lib/librte_net/rte_net_version.map |  6 ++++++
>  4 files changed, 66 insertions(+)
>  create mode 100644 lib/librte_net/rte_arp.c
> 
> diff --git a/lib/librte_net/Makefile b/lib/librte_net/Makefile
> index 5e8a76b68..ab290c382 100644
> --- a/lib/librte_net/Makefile
> +++ b/lib/librte_net/Makefile
> @@ -13,6 +13,7 @@ LIBABIVER := 1
>  
>  SRCS-$(CONFIG_RTE_LIBRTE_NET) := rte_net.c
>  SRCS-$(CONFIG_RTE_LIBRTE_NET) += rte_net_crc.c
> +SRCS-$(CONFIG_RTE_LIBRTE_NET) += rte_arp.c
>  
>  # install includes
>  SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h rte_esp.h
> diff --git a/lib/librte_net/rte_arp.c b/lib/librte_net/rte_arp.c
> new file mode 100644
> index 000000000..d7223b044
> --- /dev/null
> +++ b/lib/librte_net/rte_arp.c
> @@ -0,0 +1,42 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2018 Intel Corporation
> + */
> +
> +#include <arpa/inet.h>
> +
> +#include <rte_arp.h>
> +
> +#define RARP_PKT_SIZE	64
> +int
> +rte_net_make_rarp_packet(struct rte_mbuf *mbuf, const struct ether_addr *mac)
> +{
> +	struct ether_hdr *eth_hdr;
> +	struct arp_hdr *rarp;
> +
> +	if (mbuf->buf_len < RARP_PKT_SIZE)
> +		return -1;
> +
> +	/* Ethernet header. */
> +	eth_hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
> +	memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN);
> +	ether_addr_copy(mac, &eth_hdr->s_addr);
> +	eth_hdr->ether_type = htons(ETHER_TYPE_RARP);
> +
> +	/* RARP header. */
> +	rarp = (struct arp_hdr *)(eth_hdr + 1);
> +	rarp->arp_hrd = htons(ARP_HRD_ETHER);
> +	rarp->arp_pro = htons(ETHER_TYPE_IPv4);
> +	rarp->arp_hln = ETHER_ADDR_LEN;
> +	rarp->arp_pln = 4;
> +	rarp->arp_op  = htons(ARP_OP_REVREQUEST);
> +
> +	ether_addr_copy(mac, &rarp->arp_data.arp_sha);
> +	ether_addr_copy(mac, &rarp->arp_data.arp_tha);
> +	memset(&rarp->arp_data.arp_sip, 0x00, 4);
> +	memset(&rarp->arp_data.arp_tip, 0x00, 4);
> +
> +	mbuf->data_len = RARP_PKT_SIZE;
> +	mbuf->pkt_len = RARP_PKT_SIZE;
> +
> +	return 0;
> +}
> diff --git a/lib/librte_net/rte_arp.h b/lib/librte_net/rte_arp.h
> index 183641874..dad7423ad 100644
> --- a/lib/librte_net/rte_arp.h
> +++ b/lib/librte_net/rte_arp.h
> @@ -76,6 +76,23 @@ struct arp_hdr {
>  	struct arp_ipv4 arp_data;
>  } __attribute__((__packed__));
>  
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice
> + *
> + * Make a RARP packet based on MAC addr.
> + *
> + * @param mbuf
> + *   Pointer to the rte_mbuf structure
> + * @param mac
> + *   Pointer to the MAC addr
> + *
> + * @return
> + *   - 0 on success, negative on error
> + */
> +int
> +rte_net_make_rarp_packet(struct rte_mbuf *mbuf, const struct ether_addr *mac);
> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/lib/librte_net/rte_net_version.map b/lib/librte_net/rte_net_version.map
> index 687c40eaf..213e6fd32 100644
> --- a/lib/librte_net/rte_net_version.map
> +++ b/lib/librte_net/rte_net_version.map
> @@ -12,3 +12,9 @@ DPDK_17.05 {
>  	rte_net_crc_set_alg;
>  
>  } DPDK_16.11;
> +
> +EXPERIMENTAL {
> +	global:
> +
> +	rte_net_make_rarp_packet;
> +} DPDK_17.05;
> -- 
> 2.15.1
  
Thomas Monjalon Jan. 10, 2018, 2:10 p.m. UTC | #2
10/01/2018 14:06, Yuanhan Liu:
> Thomas, look good to you?

The format looks good, yes.

Cc Olivier, the maintainer.
  
Olivier Matz Jan. 16, 2018, 9:01 a.m. UTC | #3
Hi Xiao,

Please find few comments below.

On Wed, Jan 10, 2018 at 09:23:54AM +0800, Xiao Wang wrote:
> Suggested-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
>  lib/librte_net/Makefile            |  1 +
>  lib/librte_net/rte_arp.c           | 42 ++++++++++++++++++++++++++++++++++++++
>  lib/librte_net/rte_arp.h           | 17 +++++++++++++++
>  lib/librte_net/rte_net_version.map |  6 ++++++
>  4 files changed, 66 insertions(+)
>  create mode 100644 lib/librte_net/rte_arp.c
> 
> diff --git a/lib/librte_net/Makefile b/lib/librte_net/Makefile
> index 5e8a76b68..ab290c382 100644
> --- a/lib/librte_net/Makefile
> +++ b/lib/librte_net/Makefile
> @@ -13,6 +13,7 @@ LIBABIVER := 1
>  
>  SRCS-$(CONFIG_RTE_LIBRTE_NET) := rte_net.c
>  SRCS-$(CONFIG_RTE_LIBRTE_NET) += rte_net_crc.c
> +SRCS-$(CONFIG_RTE_LIBRTE_NET) += rte_arp.c
>  
>  # install includes
>  SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h rte_esp.h
> diff --git a/lib/librte_net/rte_arp.c b/lib/librte_net/rte_arp.c
> new file mode 100644
> index 000000000..d7223b044
> --- /dev/null
> +++ b/lib/librte_net/rte_arp.c
> @@ -0,0 +1,42 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2018 Intel Corporation
> + */
> +
> +#include <arpa/inet.h>
> +
> +#include <rte_arp.h>
> +
> +#define RARP_PKT_SIZE	64
> +int
> +rte_net_make_rarp_packet(struct rte_mbuf *mbuf, const struct ether_addr *mac)
> +{
> +	struct ether_hdr *eth_hdr;
> +	struct arp_hdr *rarp;
> +
> +	if (mbuf->buf_len < RARP_PKT_SIZE)
> +		return -1;
> +
> +	/* Ethernet header. */
> +	eth_hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
> +	memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN);
> +	ether_addr_copy(mac, &eth_hdr->s_addr);
> +	eth_hdr->ether_type = htons(ETHER_TYPE_RARP);
> +
> +	/* RARP header. */
> +	rarp = (struct arp_hdr *)(eth_hdr + 1);
> +	rarp->arp_hrd = htons(ARP_HRD_ETHER);
> +	rarp->arp_pro = htons(ETHER_TYPE_IPv4);
> +	rarp->arp_hln = ETHER_ADDR_LEN;
> +	rarp->arp_pln = 4;
> +	rarp->arp_op  = htons(ARP_OP_REVREQUEST);
> +
> +	ether_addr_copy(mac, &rarp->arp_data.arp_sha);
> +	ether_addr_copy(mac, &rarp->arp_data.arp_tha);
> +	memset(&rarp->arp_data.arp_sip, 0x00, 4);
> +	memset(&rarp->arp_data.arp_tip, 0x00, 4);
> +
> +	mbuf->data_len = RARP_PKT_SIZE;
> +	mbuf->pkt_len = RARP_PKT_SIZE;
> +
> +	return 0;
> +}

You don't check that there is enough tailroom to write the packet data.
Also, nothing verifies that the mbuf passed to the function is empty.
I suggest to do the allocation in this function, what do you think?

You can also use rte_pktmbuf_append() to check for the tailroom and
update data_len/pkt_len:

	m = rte_pktmbuf_alloc();
	if (m == NULL)
		return NULL;
	eth_hdr = rte_pktmbuf_append(m, RARP_PKT_SIZE);
	if (eth_hdr == NULL) {
		m_freem(m);
		return NULL;
	}
	eth_hdr->... = ...;
	...
	rarp = (struct arp_hdr *)(eth_hdr + 1);
	rarp->... = ...;
	...

	return m;
  
Xiao Wang Jan. 16, 2018, 9:43 a.m. UTC | #4
Hi Olivier,

> -----Original Message-----
> From: Olivier Matz [mailto:olivier.matz@6wind.com]
> Sent: Tuesday, January 16, 2018 5:01 PM
> To: Wang, Xiao W <xiao.w.wang@intel.com>
> Cc: yliu@fridaylinux.org; thomas@monjalon.net; Bie, Tiwei
> <tiwei.bie@intel.com>; dev@dpdk.org; stephen@networkplumber.org;
> maxime.coquelin@redhat.com
> Subject: Re: [dpdk-dev] [PATCH v10 3/5] net: add a helper for making RARP
> packet
> 
> Hi Xiao,
> 
> Please find few comments below.
> 
> On Wed, Jan 10, 2018 at 09:23:54AM +0800, Xiao Wang wrote:
> > Suggested-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> > Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> > Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> > ---
> >  lib/librte_net/Makefile            |  1 +
> >  lib/librte_net/rte_arp.c           | 42
> ++++++++++++++++++++++++++++++++++++++
> >  lib/librte_net/rte_arp.h           | 17 +++++++++++++++
> >  lib/librte_net/rte_net_version.map |  6 ++++++
> >  4 files changed, 66 insertions(+)
> >  create mode 100644 lib/librte_net/rte_arp.c
> >
> > diff --git a/lib/librte_net/Makefile b/lib/librte_net/Makefile
> > index 5e8a76b68..ab290c382 100644
> > --- a/lib/librte_net/Makefile
> > +++ b/lib/librte_net/Makefile
> > @@ -13,6 +13,7 @@ LIBABIVER := 1
> >
> >  SRCS-$(CONFIG_RTE_LIBRTE_NET) := rte_net.c
> >  SRCS-$(CONFIG_RTE_LIBRTE_NET) += rte_net_crc.c
> > +SRCS-$(CONFIG_RTE_LIBRTE_NET) += rte_arp.c
> >
> >  # install includes
> >  SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h
> rte_udp.h rte_esp.h
> > diff --git a/lib/librte_net/rte_arp.c b/lib/librte_net/rte_arp.c
> > new file mode 100644
> > index 000000000..d7223b044
> > --- /dev/null
> > +++ b/lib/librte_net/rte_arp.c
> > @@ -0,0 +1,42 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2018 Intel Corporation
> > + */
> > +
> > +#include <arpa/inet.h>
> > +
> > +#include <rte_arp.h>
> > +
> > +#define RARP_PKT_SIZE	64
> > +int
> > +rte_net_make_rarp_packet(struct rte_mbuf *mbuf, const struct ether_addr
> *mac)
> > +{
> > +	struct ether_hdr *eth_hdr;
> > +	struct arp_hdr *rarp;
> > +
> > +	if (mbuf->buf_len < RARP_PKT_SIZE)
> > +		return -1;
> > +
> > +	/* Ethernet header. */
> > +	eth_hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
> > +	memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN);
> > +	ether_addr_copy(mac, &eth_hdr->s_addr);
> > +	eth_hdr->ether_type = htons(ETHER_TYPE_RARP);
> > +
> > +	/* RARP header. */
> > +	rarp = (struct arp_hdr *)(eth_hdr + 1);
> > +	rarp->arp_hrd = htons(ARP_HRD_ETHER);
> > +	rarp->arp_pro = htons(ETHER_TYPE_IPv4);
> > +	rarp->arp_hln = ETHER_ADDR_LEN;
> > +	rarp->arp_pln = 4;
> > +	rarp->arp_op  = htons(ARP_OP_REVREQUEST);
> > +
> > +	ether_addr_copy(mac, &rarp->arp_data.arp_sha);
> > +	ether_addr_copy(mac, &rarp->arp_data.arp_tha);
> > +	memset(&rarp->arp_data.arp_sip, 0x00, 4);
> > +	memset(&rarp->arp_data.arp_tip, 0x00, 4);
> > +
> > +	mbuf->data_len = RARP_PKT_SIZE;
> > +	mbuf->pkt_len = RARP_PKT_SIZE;
> > +
> > +	return 0;
> > +}
> 
> You don't check that there is enough tailroom to write the packet data.

Yes, tailroom can be used.

> Also, nothing verifies that the mbuf passed to the function is empty.
> I suggest to do the allocation in this function, what do you think?
>

I agree to allocate in this function and let it do all the checks.
 
> You can also use rte_pktmbuf_append() to check for the tailroom and
> update data_len/pkt_len:
> 
> 	m = rte_pktmbuf_alloc();
> 	if (m == NULL)
> 		return NULL;
> 	eth_hdr = rte_pktmbuf_append(m, RARP_PKT_SIZE);

When data_len is not enough, we need to rte_pktmbuf_append(m, RARP_PKT_SIZE - m->data_len);

> 	if (eth_hdr == NULL) {
> 		m_freem(m);
> 		return NULL;
> 	}
> 	eth_hdr->... = ...;
> 	...
> 	rarp = (struct arp_hdr *)(eth_hdr + 1);
> 	rarp->... = ...;
> 	...
> 
> 	return m;
> 

Will change it in next version, thanks for the comments.

BRs,
Xiao
  
Olivier Matz Jan. 16, 2018, 10:42 a.m. UTC | #5
Hi Xiao,

On Tue, Jan 16, 2018 at 09:43:43AM +0000, Wang, Xiao W wrote:
> Hi Olivier,
> > You can also use rte_pktmbuf_append() to check for the tailroom and
> > update data_len/pkt_len:
> > 
> > 	m = rte_pktmbuf_alloc();
> > 	if (m == NULL)
> > 		return NULL;
> > 	eth_hdr = rte_pktmbuf_append(m, RARP_PKT_SIZE);
> 
> When data_len is not enough, we need to rte_pktmbuf_append(m, RARP_PKT_SIZE - m->data_len);

Sorry, I don't get your point here.
  
Xiao Wang Jan. 16, 2018, 11:03 a.m. UTC | #6
Hi Olivier,

> -----Original Message-----
> From: Olivier Matz [mailto:olivier.matz@6wind.com]
> Sent: Tuesday, January 16, 2018 6:43 PM
> To: Wang, Xiao W <xiao.w.wang@intel.com>
> Cc: yliu@fridaylinux.org; thomas@monjalon.net; Bie, Tiwei
> <tiwei.bie@intel.com>; dev@dpdk.org; stephen@networkplumber.org;
> maxime.coquelin@redhat.com
> Subject: Re: [dpdk-dev] [PATCH v10 3/5] net: add a helper for making RARP
> packet
> 
> Hi Xiao,
> 
> On Tue, Jan 16, 2018 at 09:43:43AM +0000, Wang, Xiao W wrote:
> > Hi Olivier,
> > > You can also use rte_pktmbuf_append() to check for the tailroom and
> > > update data_len/pkt_len:
> > >
> > > 	m = rte_pktmbuf_alloc();

I just realized that if we let this function to allocate mbuf, it may restrict this api's applicability.
E.g. the caller just has a mbuf, without a mempool.
How do you think?

> > > 	if (m == NULL)
> > > 		return NULL;
> > > 	eth_hdr = rte_pktmbuf_append(m, RARP_PKT_SIZE);
> >
> > When data_len is not enough, we need to rte_pktmbuf_append(m,
> RARP_PKT_SIZE - m->data_len);
> 
> Sorry, I don't get your point here.

I mean we just need to extend the data_len by "RARP_PKT_SIZE - m->data_len" when the room is not big enough.

BRs,
Xiao
  
Xiao Wang Jan. 16, 2018, 11:42 a.m. UTC | #7
> -----Original Message-----
> From: Wang, Xiao W
> Sent: Tuesday, January 16, 2018 7:03 PM
> To: 'Olivier Matz' <olivier.matz@6wind.com>
> Cc: yliu@fridaylinux.org; thomas@monjalon.net; Bie, Tiwei
> <tiwei.bie@intel.com>; dev@dpdk.org; stephen@networkplumber.org;
> maxime.coquelin@redhat.com
> Subject: RE: [dpdk-dev] [PATCH v10 3/5] net: add a helper for making RARP
> packet
> 
> Hi Olivier,
> 
> > -----Original Message-----
> > From: Olivier Matz [mailto:olivier.matz@6wind.com]
> > Sent: Tuesday, January 16, 2018 6:43 PM
> > To: Wang, Xiao W <xiao.w.wang@intel.com>
> > Cc: yliu@fridaylinux.org; thomas@monjalon.net; Bie, Tiwei
> > <tiwei.bie@intel.com>; dev@dpdk.org; stephen@networkplumber.org;
> > maxime.coquelin@redhat.com
> > Subject: Re: [dpdk-dev] [PATCH v10 3/5] net: add a helper for making RARP
> > packet
> >
> > Hi Xiao,
> >
> > On Tue, Jan 16, 2018 at 09:43:43AM +0000, Wang, Xiao W wrote:
> > > Hi Olivier,
> > > > You can also use rte_pktmbuf_append() to check for the tailroom and
> > > > update data_len/pkt_len:
> > > >
> > > > 	m = rte_pktmbuf_alloc();
> 
> I just realized that if we let this function to allocate mbuf, it may restrict this
> api's applicability.
> E.g. the caller just has a mbuf, without a mempool.
> How do you think?
> 
> > > > 	if (m == NULL)
> > > > 		return NULL;
> > > > 	eth_hdr = rte_pktmbuf_append(m, RARP_PKT_SIZE);
> > >
> > > When data_len is not enough, we need to rte_pktmbuf_append(m,
> > RARP_PKT_SIZE - m->data_len);
> >
> > Sorry, I don't get your point here.
> 
> I mean we just need to extend the data_len by "RARP_PKT_SIZE - m-
> >data_len" when the room is not big enough.

OK, in your sample code, you rte_pktmbuf_alloc() a mbuf, it's reset already, so we just append RARP_PKT_SIZE. I got you~

For the mbuf allocation, we can let this function do allocation and content filling. If the app needs special need, e.g. chained mbuf,
then let the app fill it by itself.

> 
> BRs,
> Xiao
  
Xiao Wang Jan. 16, 2018, 9:40 p.m. UTC | #8
When live migration is finished, the backup VM needs to proactively announce
its new location. DPDK vhost has implemented VHOST_USER_PROTOCOL_F_RARP to
generate a RARP packet to switch in dequeue path. Another method is to let
the guest proactively send out RARP packet using VIRTIO_NET_F_GUEST_ANNOUNCE
feature.

This patch set enables this feature in virtio pmd, to support VM running virtio
pmd be migrated without vhost supporting RARP generation.

v11:
- Add check for parameter and tailroom in rte_net_make_rarp_packet.
- Allocate mbuf in rte_net_make_rarp_packet.

v10:
- Add a bold doxygen comment for the experimental function.

v9:
- Introduce function with the experimental state.

v8:
- Add a helper in lib/librte_net to make rarp packet, it's used by
  both vhost and virtio.

v7:
- Improve comment for state_lock.
- Rename spinlock variable 'sl' to 'lock'.

v6:
- Use rte_pktmbuf_alloc() instead of rte_mbuf_raw_alloc().
- Remove the 'len' parameter in calling virtio_send_command().
- Remove extra space between typo and var.
- Improve comment and alignment.
- Remove the unnecessary header file.
- A better usage of 'unlikely' indication.

v5:
- Remove txvq parameter in virtio_inject_pkts.
- Zero hw->special_buf after using it.
- Return the retval of tx_pkt_burst().
- Allocate a mbuf pointer on stack directly.

v4:
- Move spinlock lock/unlock into dev_pause/resume.
- Separate out a patch for packet injection.

v3:
- Remove Tx function code duplication, use a special pointer for rarp
  injection.
- Rename function generate_rarp to virtio_notify_peers, replace
  'virtnet_' with 'virtio_'.
- Add comment for state_lock.
- Typo fix and comment improvement.

v2:
- Use spaces instead of tabs between the code and comments.
- Remove unnecessary parentheses.
- Use rte_pktmbuf_mtod directly to get eth_hdr addr.
- Fix virtio_dev_pause return value check.

Xiao Wang (5):
  net/virtio: make control queue thread-safe
  net/virtio: add packet injection method
  net: add a helper for making RARP packet
  vhost: use lib API to make RARP packet
  net/virtio: support GUEST ANNOUNCE

 drivers/net/virtio/virtio_ethdev.c      | 113 +++++++++++++++++++++++++++++++-
 drivers/net/virtio/virtio_ethdev.h      |   6 ++
 drivers/net/virtio/virtio_pci.h         |   7 ++
 drivers/net/virtio/virtio_rxtx.c        |   3 +-
 drivers/net/virtio/virtio_rxtx.h        |   1 +
 drivers/net/virtio/virtio_rxtx_simple.c |   2 +-
 drivers/net/virtio/virtqueue.h          |  11 ++++
 lib/Makefile                            |   3 +-
 lib/librte_net/Makefile                 |   1 +
 lib/librte_net/rte_arp.c                |  50 ++++++++++++++
 lib/librte_net/rte_arp.h                |  18 +++++
 lib/librte_net/rte_net_version.map      |   6 ++
 lib/librte_vhost/Makefile               |   2 +-
 lib/librte_vhost/virtio_net.c           |  51 +-------------
 14 files changed, 219 insertions(+), 55 deletions(-)
 create mode 100644 lib/librte_net/rte_arp.c
  
Yuanhan Liu Jan. 18, 2018, 3:09 a.m. UTC | #9
Xiao told me that this series (except the last patch) was already applied
to the Thomas master branch. I then realised it was my mistake.

I applied v10 last week locally for some basic testing. There is a conflict
in last patch, that's why the last patch is not merged. I forgot to do
a reset before I applied another patch. Then later, I did a push to the
next-virtio tree, thus patches from Xiao were also pushed. Ferruh then
did a pull from it. As a result, they got merged to the master branch
before I realised. Non-rebase is allowed there, thus I have made a patch
to fix my mistake.

Meanwhile, I have also spotted a build error when shared lib is enabled.
I will send them out soon.

	--yliu

On Wed, Jan 17, 2018 at 05:40:58AM +0800, Xiao Wang wrote:
> When live migration is finished, the backup VM needs to proactively announce
> its new location. DPDK vhost has implemented VHOST_USER_PROTOCOL_F_RARP to
> generate a RARP packet to switch in dequeue path. Another method is to let
> the guest proactively send out RARP packet using VIRTIO_NET_F_GUEST_ANNOUNCE
> feature.
> 
> This patch set enables this feature in virtio pmd, to support VM running virtio
> pmd be migrated without vhost supporting RARP generation.
> 
> v11:
> - Add check for parameter and tailroom in rte_net_make_rarp_packet.
> - Allocate mbuf in rte_net_make_rarp_packet.
> 
> v10:
> - Add a bold doxygen comment for the experimental function.
> 
> v9:
> - Introduce function with the experimental state.
> 
> v8:
> - Add a helper in lib/librte_net to make rarp packet, it's used by
>   both vhost and virtio.
> 
> v7:
> - Improve comment for state_lock.
> - Rename spinlock variable 'sl' to 'lock'.
> 
> v6:
> - Use rte_pktmbuf_alloc() instead of rte_mbuf_raw_alloc().
> - Remove the 'len' parameter in calling virtio_send_command().
> - Remove extra space between typo and var.
> - Improve comment and alignment.
> - Remove the unnecessary header file.
> - A better usage of 'unlikely' indication.
> 
> v5:
> - Remove txvq parameter in virtio_inject_pkts.
> - Zero hw->special_buf after using it.
> - Return the retval of tx_pkt_burst().
> - Allocate a mbuf pointer on stack directly.
> 
> v4:
> - Move spinlock lock/unlock into dev_pause/resume.
> - Separate out a patch for packet injection.
> 
> v3:
> - Remove Tx function code duplication, use a special pointer for rarp
>   injection.
> - Rename function generate_rarp to virtio_notify_peers, replace
>   'virtnet_' with 'virtio_'.
> - Add comment for state_lock.
> - Typo fix and comment improvement.
> 
> v2:
> - Use spaces instead of tabs between the code and comments.
> - Remove unnecessary parentheses.
> - Use rte_pktmbuf_mtod directly to get eth_hdr addr.
> - Fix virtio_dev_pause return value check.
> 
> Xiao Wang (5):
>   net/virtio: make control queue thread-safe
>   net/virtio: add packet injection method
>   net: add a helper for making RARP packet
>   vhost: use lib API to make RARP packet
>   net/virtio: support GUEST ANNOUNCE
> 
>  drivers/net/virtio/virtio_ethdev.c      | 113 +++++++++++++++++++++++++++++++-
>  drivers/net/virtio/virtio_ethdev.h      |   6 ++
>  drivers/net/virtio/virtio_pci.h         |   7 ++
>  drivers/net/virtio/virtio_rxtx.c        |   3 +-
>  drivers/net/virtio/virtio_rxtx.h        |   1 +
>  drivers/net/virtio/virtio_rxtx_simple.c |   2 +-
>  drivers/net/virtio/virtqueue.h          |  11 ++++
>  lib/Makefile                            |   3 +-
>  lib/librte_net/Makefile                 |   1 +
>  lib/librte_net/rte_arp.c                |  50 ++++++++++++++
>  lib/librte_net/rte_arp.h                |  18 +++++
>  lib/librte_net/rte_net_version.map      |   6 ++
>  lib/librte_vhost/Makefile               |   2 +-
>  lib/librte_vhost/virtio_net.c           |  51 +-------------
>  14 files changed, 219 insertions(+), 55 deletions(-)
>  create mode 100644 lib/librte_net/rte_arp.c
> 
> -- 
> 2.15.1
  

Patch

diff --git a/lib/librte_net/Makefile b/lib/librte_net/Makefile
index 5e8a76b68..ab290c382 100644
--- a/lib/librte_net/Makefile
+++ b/lib/librte_net/Makefile
@@ -13,6 +13,7 @@  LIBABIVER := 1
 
 SRCS-$(CONFIG_RTE_LIBRTE_NET) := rte_net.c
 SRCS-$(CONFIG_RTE_LIBRTE_NET) += rte_net_crc.c
+SRCS-$(CONFIG_RTE_LIBRTE_NET) += rte_arp.c
 
 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h rte_esp.h
diff --git a/lib/librte_net/rte_arp.c b/lib/librte_net/rte_arp.c
new file mode 100644
index 000000000..d7223b044
--- /dev/null
+++ b/lib/librte_net/rte_arp.c
@@ -0,0 +1,42 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <arpa/inet.h>
+
+#include <rte_arp.h>
+
+#define RARP_PKT_SIZE	64
+int
+rte_net_make_rarp_packet(struct rte_mbuf *mbuf, const struct ether_addr *mac)
+{
+	struct ether_hdr *eth_hdr;
+	struct arp_hdr *rarp;
+
+	if (mbuf->buf_len < RARP_PKT_SIZE)
+		return -1;
+
+	/* Ethernet header. */
+	eth_hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
+	memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN);
+	ether_addr_copy(mac, &eth_hdr->s_addr);
+	eth_hdr->ether_type = htons(ETHER_TYPE_RARP);
+
+	/* RARP header. */
+	rarp = (struct arp_hdr *)(eth_hdr + 1);
+	rarp->arp_hrd = htons(ARP_HRD_ETHER);
+	rarp->arp_pro = htons(ETHER_TYPE_IPv4);
+	rarp->arp_hln = ETHER_ADDR_LEN;
+	rarp->arp_pln = 4;
+	rarp->arp_op  = htons(ARP_OP_REVREQUEST);
+
+	ether_addr_copy(mac, &rarp->arp_data.arp_sha);
+	ether_addr_copy(mac, &rarp->arp_data.arp_tha);
+	memset(&rarp->arp_data.arp_sip, 0x00, 4);
+	memset(&rarp->arp_data.arp_tip, 0x00, 4);
+
+	mbuf->data_len = RARP_PKT_SIZE;
+	mbuf->pkt_len = RARP_PKT_SIZE;
+
+	return 0;
+}
diff --git a/lib/librte_net/rte_arp.h b/lib/librte_net/rte_arp.h
index 183641874..dad7423ad 100644
--- a/lib/librte_net/rte_arp.h
+++ b/lib/librte_net/rte_arp.h
@@ -76,6 +76,23 @@  struct arp_hdr {
 	struct arp_ipv4 arp_data;
 } __attribute__((__packed__));
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Make a RARP packet based on MAC addr.
+ *
+ * @param mbuf
+ *   Pointer to the rte_mbuf structure
+ * @param mac
+ *   Pointer to the MAC addr
+ *
+ * @return
+ *   - 0 on success, negative on error
+ */
+int
+rte_net_make_rarp_packet(struct rte_mbuf *mbuf, const struct ether_addr *mac);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_net/rte_net_version.map b/lib/librte_net/rte_net_version.map
index 687c40eaf..213e6fd32 100644
--- a/lib/librte_net/rte_net_version.map
+++ b/lib/librte_net/rte_net_version.map
@@ -12,3 +12,9 @@  DPDK_17.05 {
 	rte_net_crc_set_alg;
 
 } DPDK_16.11;
+
+EXPERIMENTAL {
+	global:
+
+	rte_net_make_rarp_packet;
+} DPDK_17.05;