[v3,4/6] mbuf: add a pktmbuf copy routine
diff mbox series

Message ID 20190930192056.26828-5-stephen@networkplumber.org
State Superseded, archived
Headers show
Series
  • mbuf copy/cloning enhancements
Related show

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Stephen Hemminger Sept. 30, 2019, 7:20 p.m. UTC
This is a commonly used operation that surprisingly the
DPDK has not supported. The new rte_pktmbuf_copy does a
deep copy of packet. This is a complete copy including
meta-data.

It handles the case where the source mbuf comes from a pool
with larger data area than the destination pool. The routine
also has options for skipping data, or truncating at a fixed
length.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 lib/librte_mbuf/rte_mbuf.c           | 74 ++++++++++++++++++++++++++++
 lib/librte_mbuf/rte_mbuf.h           | 26 ++++++++++
 lib/librte_mbuf/rte_mbuf_version.map |  1 +
 3 files changed, 101 insertions(+)

Comments

Andrew Rybchenko Oct. 1, 2019, 2:03 p.m. UTC | #1
On 9/30/19 10:20 PM, Stephen Hemminger wrote:
> This is a commonly used operation that surprisingly the
> DPDK has not supported. The new rte_pktmbuf_copy does a
> deep copy of packet. This is a complete copy including
> meta-data.
>
> It handles the case where the source mbuf comes from a pool
> with larger data area than the destination pool. The routine
> also has options for skipping data, or truncating at a fixed
> length.
>
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> ---
>   lib/librte_mbuf/rte_mbuf.c           | 74 ++++++++++++++++++++++++++++
>   lib/librte_mbuf/rte_mbuf.h           | 26 ++++++++++
>   lib/librte_mbuf/rte_mbuf_version.map |  1 +
>   3 files changed, 101 insertions(+)
>
> diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
> index 9a1a1b5f9468..901df0192d2e 100644
> --- a/lib/librte_mbuf/rte_mbuf.c
> +++ b/lib/librte_mbuf/rte_mbuf.c
> @@ -321,6 +321,80 @@ __rte_pktmbuf_linearize(struct rte_mbuf *mbuf)
>   	return 0;
>   }
>   
> +/* Create a deep copy of mbuf */
> +struct rte_mbuf *
> +rte_pktmbuf_copy(const struct rte_mbuf *m, struct rte_mempool *mp,
> +		 uint32_t off, uint32_t len)
> +{
> +	const struct rte_mbuf *seg = m;
> +	struct rte_mbuf *mc, *m_last, **prev;
> +
> +	if (unlikely(off >= m->pkt_len))
> +		return NULL;
> +
> +	mc = rte_pktmbuf_alloc(mp);
> +	if (unlikely(mc == NULL))
> +		return NULL;
> +
> +	if (len > m->pkt_len - off)
> +		len = m->pkt_len - off;
> +
> +	/* clone meta data from original */
> +	mc->port = m->port;
> +	mc->vlan_tci = m->vlan_tci;
> +	mc->vlan_tci_outer = m->vlan_tci_outer;
> +	mc->tx_offload = m->tx_offload;
> +	mc->hash = m->hash;
> +	mc->packet_type = m->packet_type;
> +	mc->timestamp = m->timestamp;

The same is done in rte_pktmbuf_attach(). May be we need a helper
function to copy meta data? Just to avoid duplication in many places.

> +
> +	/* copy private data (if any) */
> +	rte_memcpy(mc + 1, m + 1,
> +		   rte_pktmbuf_priv_size(mp));

priv_size is mempool specific and original mbuf mempool
may have smaller priv_size. I'm not sure that it is safe to copy
outsize of priv_size at least from security point of view.
So, I think it should be RTE_MIN here.

> +
> +	prev = &mc->next;
> +	m_last = mc;
> +	while (len > 0) {
> +		uint32_t copy_len;
> +
> +		while (off >= seg->data_len) {
> +			off -= seg->data_len;
> +			seg = seg->next;
> +		}
> +
> +		/* current buffer is full, chain a new one */
> +		if (rte_pktmbuf_tailroom(m_last) == 0) {
> +			m_last = rte_pktmbuf_alloc(mp);
> +			if (unlikely(m_last == NULL)) {
> +				rte_pktmbuf_free(mc);
> +				return NULL;
> +			}
> +			++mc->nb_segs;
> +			*prev = m_last;
> +			prev = &m_last->next;
> +		}
> +
> +		copy_len = RTE_MIN(seg->data_len - off, len);
> +		if (copy_len > rte_pktmbuf_tailroom(m_last))
> +			copy_len = rte_pktmbuf_tailroom(m_last);
> +
> +		/* append from seg to m_last */
> +		rte_memcpy(rte_pktmbuf_mtod_offset(m_last, char *,
> +						   m_last->data_len),
> +			   rte_pktmbuf_mtod_offset(seg, char *,
> +						   off),
> +			   copy_len);
> +
> +		m_last->data_len += copy_len;
> +		mc->pkt_len += copy_len;
> +		off += copy_len;
> +		len -= copy_len;
> +	}
> +
> +	__rte_mbuf_sanity_check(mc, 1);
> +	return mc;
> +}
> +
>   /* dump a mbuf on console */
>   void
>   rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, unsigned dump_len)

[snip]
Slava Ovsiienko Oct. 1, 2019, 5:36 p.m. UTC | #2
Hi, Stephen

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Stephen Hemminger
> Sent: Monday, September 30, 2019 22:21
> To: dev@dpdk.org
> Cc: Stephen Hemminger <stephen@networkplumber.org>
> Subject: [dpdk-dev] [PATCH v3 4/6] mbuf: add a pktmbuf copy routine
> 
> This is a commonly used operation that surprisingly the DPDK has not
> supported. The new rte_pktmbuf_copy does a deep copy of packet. This is a
> complete copy including meta-data.

There is mbuf dynamic fields coming [1], these ones also should be copied.
Is this patch planned to be updated?

[1] http://patches.dpdk.org/patch/59343/

[snip]

WBR, Slava
Stephen Hemminger Oct. 1, 2019, 11:29 p.m. UTC | #3
On Tue, 1 Oct 2019 17:36:10 +0000
Slava Ovsiienko <viacheslavo@mellanox.com> wrote:

> Hi, Stephen
> 
> > -----Original Message-----
> > From: dev <dev-bounces@dpdk.org> On Behalf Of Stephen Hemminger
> > Sent: Monday, September 30, 2019 22:21
> > To: dev@dpdk.org
> > Cc: Stephen Hemminger <stephen@networkplumber.org>
> > Subject: [dpdk-dev] [PATCH v3 4/6] mbuf: add a pktmbuf copy routine
> > 
> > This is a commonly used operation that surprisingly the DPDK has not
> > supported. The new rte_pktmbuf_copy does a deep copy of packet. This is a
> > complete copy including meta-data.  
> 
> There is mbuf dynamic fields coming [1], these ones also should be copied.
> Is this patch planned to be updated?
> 
> [1] http://patches.dpdk.org/patch/59343/
> 
> [snip]
> 
> WBR, Slava
> 

If this is merged, then it is responsibility of the dynamic mbuf
patch submitter to update. If dynamic mbuf makes it in, then this
patch can be updated.

Patch
diff mbox series

diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
index 9a1a1b5f9468..901df0192d2e 100644
--- a/lib/librte_mbuf/rte_mbuf.c
+++ b/lib/librte_mbuf/rte_mbuf.c
@@ -321,6 +321,80 @@  __rte_pktmbuf_linearize(struct rte_mbuf *mbuf)
 	return 0;
 }
 
+/* Create a deep copy of mbuf */
+struct rte_mbuf *
+rte_pktmbuf_copy(const struct rte_mbuf *m, struct rte_mempool *mp,
+		 uint32_t off, uint32_t len)
+{
+	const struct rte_mbuf *seg = m;
+	struct rte_mbuf *mc, *m_last, **prev;
+
+	if (unlikely(off >= m->pkt_len))
+		return NULL;
+
+	mc = rte_pktmbuf_alloc(mp);
+	if (unlikely(mc == NULL))
+		return NULL;
+
+	if (len > m->pkt_len - off)
+		len = m->pkt_len - off;
+
+	/* clone meta data from original */
+	mc->port = m->port;
+	mc->vlan_tci = m->vlan_tci;
+	mc->vlan_tci_outer = m->vlan_tci_outer;
+	mc->tx_offload = m->tx_offload;
+	mc->hash = m->hash;
+	mc->packet_type = m->packet_type;
+	mc->timestamp = m->timestamp;
+
+	/* copy private data (if any) */
+	rte_memcpy(mc + 1, m + 1,
+		   rte_pktmbuf_priv_size(mp));
+
+	prev = &mc->next;
+	m_last = mc;
+	while (len > 0) {
+		uint32_t copy_len;
+
+		while (off >= seg->data_len) {
+			off -= seg->data_len;
+			seg = seg->next;
+		}
+
+		/* current buffer is full, chain a new one */
+		if (rte_pktmbuf_tailroom(m_last) == 0) {
+			m_last = rte_pktmbuf_alloc(mp);
+			if (unlikely(m_last == NULL)) {
+				rte_pktmbuf_free(mc);
+				return NULL;
+			}
+			++mc->nb_segs;
+			*prev = m_last;
+			prev = &m_last->next;
+		}
+
+		copy_len = RTE_MIN(seg->data_len - off, len);
+		if (copy_len > rte_pktmbuf_tailroom(m_last))
+			copy_len = rte_pktmbuf_tailroom(m_last);
+
+		/* append from seg to m_last */
+		rte_memcpy(rte_pktmbuf_mtod_offset(m_last, char *,
+						   m_last->data_len),
+			   rte_pktmbuf_mtod_offset(seg, char *,
+						   off),
+			   copy_len);
+
+		m_last->data_len += copy_len;
+		mc->pkt_len += copy_len;
+		off += copy_len;
+		len -= copy_len;
+	}
+
+	__rte_mbuf_sanity_check(mc, 1);
+	return mc;
+}
+
 /* dump a mbuf on console */
 void
 rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, unsigned dump_len)
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 6133f12172ae..b860d570ef20 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -1927,6 +1927,32 @@  static inline void rte_pktmbuf_free(struct rte_mbuf *m)
 struct rte_mbuf *
 rte_pktmbuf_clone(struct rte_mbuf *md, struct rte_mempool *mp);
 
+/**
+ * Creates a full copy of a given packet mbuf.
+ *
+ * Copies all the data from a given packet mbuf to a newly allocated
+ * set of mbufs.
+ *
+ * @param m
+ *   The packet mbuf to be cloned.
+ * @param mp
+ *   The mempool from which the "clone" mbufs are allocated.
+ * @param offset
+ *   The number of bytes to skip before copying.
+ *   If the mbuf does not have that many bytes, it is an error
+ *   and NULL is returned.
+ * @param length
+ *   The upper limit on bytes to copy.  Passing UINT32_MAX
+ *   means all data (after offset).
+ * @return
+ *   - The pointer to the new "clone" mbuf on success.
+ *   - NULL if allocation fails.
+ */
+__rte_experimental
+struct rte_mbuf *
+rte_pktmbuf_copy(const struct rte_mbuf *m, struct rte_mempool *mp,
+		 uint32_t offset, uint32_t length);
+
 /**
  * Adds given value to the refcnt of all packet mbuf segments.
  *
diff --git a/lib/librte_mbuf/rte_mbuf_version.map b/lib/librte_mbuf/rte_mbuf_version.map
index ff5c18a5559b..a50dcb6db9ec 100644
--- a/lib/librte_mbuf/rte_mbuf_version.map
+++ b/lib/librte_mbuf/rte_mbuf_version.map
@@ -57,4 +57,5 @@  EXPERIMENTAL {
 	global:
 
 	rte_mbuf_check;
+	rte_pktmbuf_copy;
 } DPDK_18.08;