[v5,4/5] mbuf: add a pktmbuf copy routine

Message ID 20191007154343.8556-5-stephen@networkplumber.org (mailing list archive)
State Superseded, archived
Headers
Series mbuf copy/cloning enhancements |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Stephen Hemminger Oct. 7, 2019, 3:43 p.m. UTC
  This is a commonly used operation that surprisingly the
DPDK has not supported. The new rte_pktmbuf_copy does a
deep copy of packet. This is a complete copy including
meta-data.

It handles the case where the source mbuf comes from a pool
with larger data area than the destination pool. The routine
also has options for skipping data, or truncating at a fixed
length.

This patch also introduces internal inline to copy the
metadata fields of mbuf.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 lib/librte_mbuf/rte_mbuf.c           | 69 ++++++++++++++++++++++++++++
 lib/librte_mbuf/rte_mbuf.h           | 53 +++++++++++++++++----
 lib/librte_mbuf/rte_mbuf_version.map |  1 +
 3 files changed, 113 insertions(+), 10 deletions(-)
  

Comments

Olivier Matz Oct. 8, 2019, 9:03 a.m. UTC | #1
Hi Stephen,

Thank you for this patch. Few comments below.

On Mon, Oct 07, 2019 at 08:43:42AM -0700, Stephen Hemminger wrote:
> This is a commonly used operation that surprisingly the
> DPDK has not supported. The new rte_pktmbuf_copy does a
> deep copy of packet. This is a complete copy including
> meta-data.
> 
> It handles the case where the source mbuf comes from a pool
> with larger data area than the destination pool. The routine
> also has options for skipping data, or truncating at a fixed
> length.
> 
> This patch also introduces internal inline to copy the
> metadata fields of mbuf.
> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> ---
>  lib/librte_mbuf/rte_mbuf.c           | 69 ++++++++++++++++++++++++++++
>  lib/librte_mbuf/rte_mbuf.h           | 53 +++++++++++++++++----
>  lib/librte_mbuf/rte_mbuf_version.map |  1 +
>  3 files changed, 113 insertions(+), 10 deletions(-)
> 
> diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
> index 9a1a1b5f9468..d5c9488fa213 100644
> --- a/lib/librte_mbuf/rte_mbuf.c
> +++ b/lib/librte_mbuf/rte_mbuf.c
> @@ -321,6 +321,75 @@ __rte_pktmbuf_linearize(struct rte_mbuf *mbuf)
>  	return 0;
>  }
>  
> +/* Create a deep copy of mbuf */
> +struct rte_mbuf *
> +rte_pktmbuf_copy(const struct rte_mbuf *m, struct rte_mempool *mp,
> +		 uint32_t off, uint32_t len)
> +{
> +	const struct rte_mbuf *seg = m;
> +	struct rte_mbuf *mc, *m_last, **prev;
> +	uint16_t priv_size;
> +
> +	if (unlikely(off >= m->pkt_len))
> +		return NULL;
> +
> +	mc = rte_pktmbuf_alloc(mp);
> +	if (unlikely(mc == NULL))
> +		return NULL;
> +
> +	if (len > m->pkt_len - off)
> +		len = m->pkt_len - off;
> +
> +	__rte_pktmbuf_copy_hdr(mc, m);

I think ol_flags should be copied too.
Take care to not copy the IND_ATTACHED_MBUF bit.

> +
> +	/* copy private area (can be 0) */
> +	priv_size = RTE_MIN(rte_pktmbuf_priv_size(mp),
> +			    rte_pktmbuf_priv_size(m->pool));
> +	rte_memcpy(mc + 1, m + 1, priv_size);

I'm not sure doing the copy on the MIN() length is a good solution.
Nothing states that the priv area of different mbuf pools are compatible.

I suggest to drop the priv copy, like it's done for the clone. An
application can provide its own functions for copy and clone, and can
adapt the priv copy to its needs.

An alternative is to allocate from the same pool than m, but I prefer
the first solution.

> +
> +	prev = &mc->next;
> +	m_last = mc;
> +	while (len > 0) {
> +		uint32_t copy_len;
> +
> +		while (off >= seg->data_len) {
> +			off -= seg->data_len;
> +			seg = seg->next;
> +		}
> +
> +		/* current buffer is full, chain a new one */
> +		if (rte_pktmbuf_tailroom(m_last) == 0) {
> +			m_last = rte_pktmbuf_alloc(mp);
> +			if (unlikely(m_last == NULL)) {
> +				rte_pktmbuf_free(mc);
> +				return NULL;
> +			}

For segments 2 to n, the headroom is useless. An optimization could
be to remove the headroom here.

> +			++mc->nb_segs;
> +			*prev = m_last;
> +			prev = &m_last->next;
> +		}
> +
> +		copy_len = RTE_MIN(seg->data_len - off, len);
> +		if (copy_len > rte_pktmbuf_tailroom(m_last))
> +			copy_len = rte_pktmbuf_tailroom(m_last);
> +		/* append from seg to m_last */
> +		rte_memcpy(rte_pktmbuf_mtod_offset(m_last, char *,
> +						   m_last->data_len),
> +			   rte_pktmbuf_mtod_offset(seg, char *,
> +						   off),
> +			   copy_len);
> +
> +		m_last->data_len += copy_len;
> +		mc->pkt_len += copy_len;
> +		off += copy_len;
> +		len -= copy_len;
> +	}
> +
> +	__rte_mbuf_sanity_check(mc, 1);
> +	return mc;
> +}
> +
>  /* dump a mbuf on console */
>  void
>  rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, unsigned dump_len)
> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> index 6133f12172ae..803ddf28496e 100644
> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -1684,6 +1684,19 @@ rte_pktmbuf_attach_extbuf(struct rte_mbuf *m, void *buf_addr,
>   */
>  #define rte_pktmbuf_detach_extbuf(m) rte_pktmbuf_detach(m)
>  
> +/* internal */
> +static inline void
> +__rte_pktmbuf_copy_hdr(struct rte_mbuf *mi, const struct rte_mbuf *m)
> +{
> +	mi->port = m->port;
> +	mi->vlan_tci = m->vlan_tci;
> +	mi->vlan_tci_outer = m->vlan_tci_outer;
> +	mi->tx_offload = m->tx_offload;
> +	mi->hash = m->hash;
> +	mi->packet_type = m->packet_type;
> +	mi->timestamp = m->timestamp;
> +}
> +

The names "dst" and "src" looks clearer than "mi" (for indirect mbuf)
and "m", knowing that this function can deal with 2 direct mbufs.


>  /**
>   * Attach packet mbuf to another packet mbuf.
>   *
> @@ -1721,23 +1734,17 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m)
>  		mi->ol_flags = m->ol_flags | IND_ATTACHED_MBUF;
>  	}
>  
> -	mi->buf_iova = m->buf_iova;
> -	mi->buf_addr = m->buf_addr;
> -	mi->buf_len = m->buf_len;
> +	__rte_pktmbuf_copy_hdr(mi, m);
>  
>  	mi->data_off = m->data_off;
>  	mi->data_len = m->data_len;
> -	mi->port = m->port;
> -	mi->vlan_tci = m->vlan_tci;
> -	mi->vlan_tci_outer = m->vlan_tci_outer;
> -	mi->tx_offload = m->tx_offload;
> -	mi->hash = m->hash;
> +	mi->buf_iova = m->buf_iova;
> +	mi->buf_addr = m->buf_addr;
> +	mi->buf_len = m->buf_len;
>  
>  	mi->next = NULL;
>  	mi->pkt_len = mi->data_len;
>  	mi->nb_segs = 1;
> -	mi->packet_type = m->packet_type;
> -	mi->timestamp = m->timestamp;
>  
>  	__rte_mbuf_sanity_check(mi, 1);
>  	__rte_mbuf_sanity_check(m, 0);
> @@ -1927,6 +1934,32 @@ static inline void rte_pktmbuf_free(struct rte_mbuf *m)
>  struct rte_mbuf *
>  rte_pktmbuf_clone(struct rte_mbuf *md, struct rte_mempool *mp);
>  
> +/**
> + * Creates a full copy of a given packet mbuf.

Although it's not consistent everywhere, I think the imperative
form is preferred ("Create" instead of "Creates").

> + * Copies all the data from a given packet mbuf to a newly allocated
> + * set of mbufs.

Same here

> + *
> + * @param m
> + *   The packet mbuf to be cloned.

cloned -> copied (there are some other occurrences of "clone" below)

> + * @param mp
> + *   The mempool from which the "clone" mbufs are allocated.
> + * @param offset
> + *   The number of bytes to skip before copying.
> + *   If the mbuf does not have that many bytes, it is an error
> + *   and NULL is returned.
> + * @param length
> + *   The upper limit on bytes to copy.  Passing UINT32_MAX
> + *   means all data (after offset).
> + * @return
> + *   - The pointer to the new "clone" mbuf on success.
> + *   - NULL if allocation fails.
> + */
> +__rte_experimental
> +struct rte_mbuf *
> +rte_pktmbuf_copy(const struct rte_mbuf *m, struct rte_mempool *mp,
> +		 uint32_t offset, uint32_t length);
> +
>  /**
>   * Adds given value to the refcnt of all packet mbuf segments.
>   *
> diff --git a/lib/librte_mbuf/rte_mbuf_version.map b/lib/librte_mbuf/rte_mbuf_version.map
> index ff5c18a5559b..a50dcb6db9ec 100644
> --- a/lib/librte_mbuf/rte_mbuf_version.map
> +++ b/lib/librte_mbuf/rte_mbuf_version.map
> @@ -57,4 +57,5 @@ EXPERIMENTAL {
>  	global:
>  
>  	rte_mbuf_check;
> +	rte_pktmbuf_copy;
>  } DPDK_18.08;
> -- 
> 2.20.1
>
  
Stephen Hemminger Oct. 8, 2019, 3:27 p.m. UTC | #2
On Tue, 8 Oct 2019 11:03:34 +0200
Olivier Matz <olivier.matz@6wind.com> wrote:

> >  
> > +/**
> > + * Creates a full copy of a given packet mbuf.  
> 
> Although it's not consistent everywhere, I think the imperative
> form is preferred ("Create" instead of "Creates").
> 
> > + * Copies all the data from a given packet mbuf to a newly allocated
> > + * set of mbufs.  
> 
> Same here
> 
> > + *
> > + * @param m
> > + *   The packet mbuf to be cloned.  
> 
> cloned -> copied (there are some other occurrences of "clone" below)


This comment was modified from the mbuf_clone description.
The two should use same grammar. Don't really care which way
but should be consistent between these two.

Will fix both to match your suggestion.
  

Patch

diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
index 9a1a1b5f9468..d5c9488fa213 100644
--- a/lib/librte_mbuf/rte_mbuf.c
+++ b/lib/librte_mbuf/rte_mbuf.c
@@ -321,6 +321,75 @@  __rte_pktmbuf_linearize(struct rte_mbuf *mbuf)
 	return 0;
 }
 
+/* Create a deep copy of mbuf */
+struct rte_mbuf *
+rte_pktmbuf_copy(const struct rte_mbuf *m, struct rte_mempool *mp,
+		 uint32_t off, uint32_t len)
+{
+	const struct rte_mbuf *seg = m;
+	struct rte_mbuf *mc, *m_last, **prev;
+	uint16_t priv_size;
+
+	if (unlikely(off >= m->pkt_len))
+		return NULL;
+
+	mc = rte_pktmbuf_alloc(mp);
+	if (unlikely(mc == NULL))
+		return NULL;
+
+	if (len > m->pkt_len - off)
+		len = m->pkt_len - off;
+
+	__rte_pktmbuf_copy_hdr(mc, m);
+
+	/* copy private area (can be 0) */
+	priv_size = RTE_MIN(rte_pktmbuf_priv_size(mp),
+			    rte_pktmbuf_priv_size(m->pool));
+	rte_memcpy(mc + 1, m + 1, priv_size);
+
+	prev = &mc->next;
+	m_last = mc;
+	while (len > 0) {
+		uint32_t copy_len;
+
+		while (off >= seg->data_len) {
+			off -= seg->data_len;
+			seg = seg->next;
+		}
+
+		/* current buffer is full, chain a new one */
+		if (rte_pktmbuf_tailroom(m_last) == 0) {
+			m_last = rte_pktmbuf_alloc(mp);
+			if (unlikely(m_last == NULL)) {
+				rte_pktmbuf_free(mc);
+				return NULL;
+			}
+			++mc->nb_segs;
+			*prev = m_last;
+			prev = &m_last->next;
+		}
+
+		copy_len = RTE_MIN(seg->data_len - off, len);
+		if (copy_len > rte_pktmbuf_tailroom(m_last))
+			copy_len = rte_pktmbuf_tailroom(m_last);
+
+		/* append from seg to m_last */
+		rte_memcpy(rte_pktmbuf_mtod_offset(m_last, char *,
+						   m_last->data_len),
+			   rte_pktmbuf_mtod_offset(seg, char *,
+						   off),
+			   copy_len);
+
+		m_last->data_len += copy_len;
+		mc->pkt_len += copy_len;
+		off += copy_len;
+		len -= copy_len;
+	}
+
+	__rte_mbuf_sanity_check(mc, 1);
+	return mc;
+}
+
 /* dump a mbuf on console */
 void
 rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, unsigned dump_len)
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 6133f12172ae..803ddf28496e 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -1684,6 +1684,19 @@  rte_pktmbuf_attach_extbuf(struct rte_mbuf *m, void *buf_addr,
  */
 #define rte_pktmbuf_detach_extbuf(m) rte_pktmbuf_detach(m)
 
+/* internal */
+static inline void
+__rte_pktmbuf_copy_hdr(struct rte_mbuf *mi, const struct rte_mbuf *m)
+{
+	mi->port = m->port;
+	mi->vlan_tci = m->vlan_tci;
+	mi->vlan_tci_outer = m->vlan_tci_outer;
+	mi->tx_offload = m->tx_offload;
+	mi->hash = m->hash;
+	mi->packet_type = m->packet_type;
+	mi->timestamp = m->timestamp;
+}
+
 /**
  * Attach packet mbuf to another packet mbuf.
  *
@@ -1721,23 +1734,17 @@  static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m)
 		mi->ol_flags = m->ol_flags | IND_ATTACHED_MBUF;
 	}
 
-	mi->buf_iova = m->buf_iova;
-	mi->buf_addr = m->buf_addr;
-	mi->buf_len = m->buf_len;
+	__rte_pktmbuf_copy_hdr(mi, m);
 
 	mi->data_off = m->data_off;
 	mi->data_len = m->data_len;
-	mi->port = m->port;
-	mi->vlan_tci = m->vlan_tci;
-	mi->vlan_tci_outer = m->vlan_tci_outer;
-	mi->tx_offload = m->tx_offload;
-	mi->hash = m->hash;
+	mi->buf_iova = m->buf_iova;
+	mi->buf_addr = m->buf_addr;
+	mi->buf_len = m->buf_len;
 
 	mi->next = NULL;
 	mi->pkt_len = mi->data_len;
 	mi->nb_segs = 1;
-	mi->packet_type = m->packet_type;
-	mi->timestamp = m->timestamp;
 
 	__rte_mbuf_sanity_check(mi, 1);
 	__rte_mbuf_sanity_check(m, 0);
@@ -1927,6 +1934,32 @@  static inline void rte_pktmbuf_free(struct rte_mbuf *m)
 struct rte_mbuf *
 rte_pktmbuf_clone(struct rte_mbuf *md, struct rte_mempool *mp);
 
+/**
+ * Creates a full copy of a given packet mbuf.
+ *
+ * Copies all the data from a given packet mbuf to a newly allocated
+ * set of mbufs.
+ *
+ * @param m
+ *   The packet mbuf to be cloned.
+ * @param mp
+ *   The mempool from which the "clone" mbufs are allocated.
+ * @param offset
+ *   The number of bytes to skip before copying.
+ *   If the mbuf does not have that many bytes, it is an error
+ *   and NULL is returned.
+ * @param length
+ *   The upper limit on bytes to copy.  Passing UINT32_MAX
+ *   means all data (after offset).
+ * @return
+ *   - The pointer to the new "clone" mbuf on success.
+ *   - NULL if allocation fails.
+ */
+__rte_experimental
+struct rte_mbuf *
+rte_pktmbuf_copy(const struct rte_mbuf *m, struct rte_mempool *mp,
+		 uint32_t offset, uint32_t length);
+
 /**
  * Adds given value to the refcnt of all packet mbuf segments.
  *
diff --git a/lib/librte_mbuf/rte_mbuf_version.map b/lib/librte_mbuf/rte_mbuf_version.map
index ff5c18a5559b..a50dcb6db9ec 100644
--- a/lib/librte_mbuf/rte_mbuf_version.map
+++ b/lib/librte_mbuf/rte_mbuf_version.map
@@ -57,4 +57,5 @@  EXPERIMENTAL {
 	global:
 
 	rte_mbuf_check;
+	rte_pktmbuf_copy;
 } DPDK_18.08;