[dpdk-dev,2/3] pmd: RX function need keep EXTERNAL_MBUF flag

Message ID 1414138209-24431-3-git-send-email-changchun.ouyang@intel.com (mailing list archive)
State Superseded, archived
Headers

Commit Message

Ouyang Changchun Oct. 24, 2014, 8:10 a.m. UTC
  Every pmd RX function need keep the EXTERNAL_MBUF flag
in mbuf.ol_flags, and can't overwrite it when filling ol_flags from
descriptor to mbuf, otherwise, it probably cause to crash when freeing a mbuf
and trying to freeing its attached external buffer, say, from guest space.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_pmd_e1000/igb_rxtx.c       |  5 +++--
 lib/librte_pmd_i40e/i40e_rxtx.c       |  8 +++++---
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c     |  8 +++++---
 lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c | 12 ++++++++----
 4 files changed, 21 insertions(+), 12 deletions(-)
  

Comments

Ananyev, Konstantin Oct. 24, 2014, 10:46 a.m. UTC | #1
Hi Changchun,

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ouyang Changchun
> Sent: Friday, October 24, 2014 9:10 AM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH 2/3] pmd: RX function need keep EXTERNAL_MBUF flag
> 
> Every pmd RX function need keep the EXTERNAL_MBUF flag
> in mbuf.ol_flags, and can't overwrite it when filling ol_flags from
> descriptor to mbuf, otherwise, it probably cause to crash when freeing a mbuf
> and trying to freeing its attached external buffer, say, from guest space.
> 

Don't really like the idea to put:
mb->ol_flags = pkt_flags | (mb->ol_flags & EXTERNAL_MBUF); 
in each and every PMD from now on...

From other side, it is probably not very good that RX functions update whole ol_flags, not only RX related part.
Wonder can we reserve low 32bits of ol_flags for RX, and high 32bits for TX and generic stuff.
So our ol_flags will look something like that:

union {
	uint64_t ol_raw_flags;
	struct {
		uint32_t rx;
		uint32_t gen_tx;
	} ol_flags
};

And make all PMD RX functions to operate on rx part of the flags only:
mb->ol_flags.rx = pkt_flags;
?

Konstantin

> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> ---
>  lib/librte_pmd_e1000/igb_rxtx.c       |  5 +++--
>  lib/librte_pmd_i40e/i40e_rxtx.c       |  8 +++++---
>  lib/librte_pmd_ixgbe/ixgbe_rxtx.c     |  8 +++++---
>  lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c | 12 ++++++++----
>  4 files changed, 21 insertions(+), 12 deletions(-)
> 
> diff --git a/lib/librte_pmd_e1000/igb_rxtx.c b/lib/librte_pmd_e1000/igb_rxtx.c
> index f09c525..4123310 100644
> --- a/lib/librte_pmd_e1000/igb_rxtx.c
> +++ b/lib/librte_pmd_e1000/igb_rxtx.c
> @@ -786,7 +786,7 @@ eth_igb_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
>  		pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
>  		pkt_flags = pkt_flags | rx_desc_status_to_pkt_flags(staterr);
>  		pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
> -		rxm->ol_flags = pkt_flags;
> +		rxm->ol_flags = pkt_flags | (rxm->ol_flags & EXTERNAL_MBUF);
> 
>  		/*
>  		 * Store the mbuf address into the next entry of the array
> @@ -1020,7 +1020,8 @@ eth_igb_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
>  		pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
>  		pkt_flags = pkt_flags | rx_desc_status_to_pkt_flags(staterr);
>  		pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
> -		first_seg->ol_flags = pkt_flags;
> +		first_seg->ol_flags = pkt_flags |
> +			(first_seg->ol_flags & EXTERNAL_MBUF);
> 
>  		/* Prefetch data of first segment, if configured to do so. */
>  		rte_packet_prefetch((char *)first_seg->buf_addr +
> diff --git a/lib/librte_pmd_i40e/i40e_rxtx.c b/lib/librte_pmd_i40e/i40e_rxtx.c
> index 2b53677..68c3695 100644
> --- a/lib/librte_pmd_i40e/i40e_rxtx.c
> +++ b/lib/librte_pmd_i40e/i40e_rxtx.c
> @@ -637,7 +637,8 @@ i40e_rx_scan_hw_ring(struct i40e_rx_queue *rxq)
>  			pkt_flags = i40e_rxd_status_to_pkt_flags(qword1);
>  			pkt_flags |= i40e_rxd_error_to_pkt_flags(qword1);
>  			pkt_flags |= i40e_rxd_ptype_to_pkt_flags(qword1);
> -			mb->ol_flags = pkt_flags;
> +			mb->ol_flags = pkt_flags |
> +				(mb->ol_flags & EXTERNAL_MBUF);
>  			if (pkt_flags & PKT_RX_RSS_HASH)
>  				mb->hash.rss = rte_le_to_cpu_32(\
>  					rxdp->wb.qword0.hi_dword.rss);
> @@ -873,7 +874,7 @@ i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
>  		pkt_flags = i40e_rxd_status_to_pkt_flags(qword1);
>  		pkt_flags |= i40e_rxd_error_to_pkt_flags(qword1);
>  		pkt_flags |= i40e_rxd_ptype_to_pkt_flags(qword1);
> -		rxm->ol_flags = pkt_flags;
> +		rxm->ol_flags = pkt_flags | (rxm->ol_flags & EXTERNAL_MBUF);
>  		if (pkt_flags & PKT_RX_RSS_HASH)
>  			rxm->hash.rss =
>  				rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss);
> @@ -1027,7 +1028,8 @@ i40e_recv_scattered_pkts(void *rx_queue,
>  		pkt_flags = i40e_rxd_status_to_pkt_flags(qword1);
>  		pkt_flags |= i40e_rxd_error_to_pkt_flags(qword1);
>  		pkt_flags |= i40e_rxd_ptype_to_pkt_flags(qword1);
> -		first_seg->ol_flags = pkt_flags;
> +		first_seg->ol_flags = pkt_flags |
> +			(first_seg->ol_flags & EXTERNAL_MBUF);
>  		if (pkt_flags & PKT_RX_RSS_HASH)
>  			rxm->hash.rss =
>  				rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss);
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> index 1aefe5c..77e8689 100644
> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> @@ -949,7 +949,8 @@ ixgbe_rx_scan_hw_ring(struct igb_rx_queue *rxq)
>  			/* reuse status field from scan list */
>  			pkt_flags |= rx_desc_status_to_pkt_flags(s[j]);
>  			pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
> -			mb->ol_flags = pkt_flags;
> +			mb->ol_flags = pkt_flags |
> +				(mb->ol_flags & EXTERNAL_MBUF);
> 
>  			if (likely(pkt_flags & PKT_RX_RSS_HASH))
>  				mb->hash.rss = rxdp[j].wb.lower.hi_dword.rss;
> @@ -1271,7 +1272,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
>  		pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
>  		pkt_flags = pkt_flags | rx_desc_status_to_pkt_flags(staterr);
>  		pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
> -		rxm->ol_flags = pkt_flags;
> +		rxm->ol_flags = pkt_flags | (rxm->ol_flags & EXTERNAL_MBUF);
> 
>  		if (likely(pkt_flags & PKT_RX_RSS_HASH))
>  			rxm->hash.rss = rxd.wb.lower.hi_dword.rss;
> @@ -1515,7 +1516,8 @@ ixgbe_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
>  				rx_desc_status_to_pkt_flags(staterr));
>  		pkt_flags = (uint16_t)(pkt_flags |
>  				rx_desc_error_to_pkt_flags(staterr));
> -		first_seg->ol_flags = pkt_flags;
> +		first_seg->ol_flags = pkt_flags |
> +				(first_seg->ol_flags & EXTERNAL_MBUF);
> 
>  		if (likely(pkt_flags & PKT_RX_RSS_HASH))
>  			first_seg->hash.rss = rxd.wb.lower.hi_dword.rss;
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c
> index e813e43..af7b1cd 100644
> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c
> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c
> @@ -156,10 +156,14 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
>  	ptype1 = _mm_or_si128(ptype1, vtag1);
>  	vol.dword = _mm_cvtsi128_si64(ptype1) & OLFLAGS_MASK_V;
> 
> -	rx_pkts[0]->ol_flags = vol.e[0];
> -	rx_pkts[1]->ol_flags = vol.e[1];
> -	rx_pkts[2]->ol_flags = vol.e[2];
> -	rx_pkts[3]->ol_flags = vol.e[3];
> +	rx_pkts[0]->ol_flags = vol.e[0] |
> +		(rx_pkts[0]->ol_flags & EXTERNAL_MBUF);
> +	rx_pkts[1]->ol_flags = vol.e[1] |
> +		(rx_pkts[1]->ol_flags & EXTERNAL_MBUF);
> +	rx_pkts[2]->ol_flags = vol.e[2] |
> +		(rx_pkts[2]->ol_flags & EXTERNAL_MBUF);
> +	rx_pkts[3]->ol_flags = vol.e[3] |
> +		(rx_pkts[3]->ol_flags & EXTERNAL_MBUF);
>  }
>  #else
>  #define desc_to_olflags_v(desc, rx_pkts) do {} while (0)
> --
> 1.8.4.2
  
Bruce Richardson Oct. 24, 2014, 12:34 p.m. UTC | #2
On Fri, Oct 24, 2014 at 10:46:06AM +0000, Ananyev, Konstantin wrote:
> Hi Changchun,
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ouyang Changchun
> > Sent: Friday, October 24, 2014 9:10 AM
> > To: dev@dpdk.org
> > Subject: [dpdk-dev] [PATCH 2/3] pmd: RX function need keep EXTERNAL_MBUF flag
> > 
> > Every pmd RX function need keep the EXTERNAL_MBUF flag
> > in mbuf.ol_flags, and can't overwrite it when filling ol_flags from
> > descriptor to mbuf, otherwise, it probably cause to crash when freeing a mbuf
> > and trying to freeing its attached external buffer, say, from guest space.
> > 
> 
> Don't really like the idea to put:
> mb->ol_flags = pkt_flags | (mb->ol_flags & EXTERNAL_MBUF); 
> in each and every PMD from now on...
> 
> From other side, it is probably not very good that RX functions update whole ol_flags, not only RX related part.
> Wonder can we reserve low 32bits of ol_flags for RX, and high 32bits for TX and generic stuff.
> So our ol_flags will look something like that:
> 
> union {
> 	uint64_t ol_raw_flags;
> 	struct {
> 		uint32_t rx;
> 		uint32_t gen_tx;
> 	} ol_flags
> };
> 
> And make all PMD RX functions to operate on rx part of the flags only:
> mb->ol_flags.rx = pkt_flags;
> ?
> 
> Konstantin
>
I would tend to agree with this. Changchun, did you get to assess the 
performance impact of making this change to the PMDs? I suspect that making 
the changes to each PMD would impact performance, while Konstantin's 
suggestion should eliminate that impact.
The downside there is that we are limiting the flexibility we have in 
expanding beyond 32 RX flags and 24 TX flags. :-(

/Bruce
  
Bruce Richardson Oct. 24, 2014, 3:43 p.m. UTC | #3
On Fri, Oct 24, 2014 at 01:34:58PM +0100, Bruce Richardson wrote:
> On Fri, Oct 24, 2014 at 10:46:06AM +0000, Ananyev, Konstantin wrote:
> > Hi Changchun,
> > 
> > > -----Original Message-----
> > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ouyang Changchun
> > > Sent: Friday, October 24, 2014 9:10 AM
> > > To: dev@dpdk.org
> > > Subject: [dpdk-dev] [PATCH 2/3] pmd: RX function need keep EXTERNAL_MBUF flag
> > > 
> > > Every pmd RX function need keep the EXTERNAL_MBUF flag
> > > in mbuf.ol_flags, and can't overwrite it when filling ol_flags from
> > > descriptor to mbuf, otherwise, it probably cause to crash when freeing a mbuf
> > > and trying to freeing its attached external buffer, say, from guest space.
> > > 
> > 
> > Don't really like the idea to put:
> > mb->ol_flags = pkt_flags | (mb->ol_flags & EXTERNAL_MBUF); 
> > in each and every PMD from now on...
> > 
> > From other side, it is probably not very good that RX functions update whole ol_flags, not only RX related part.
> > Wonder can we reserve low 32bits of ol_flags for RX, and high 32bits for TX and generic stuff.
> > So our ol_flags will look something like that:
> > 
> > union {
> > 	uint64_t ol_raw_flags;
> > 	struct {
> > 		uint32_t rx;
> > 		uint32_t gen_tx;
> > 	} ol_flags
> > };
> > 
> > And make all PMD RX functions to operate on rx part of the flags only:
> > mb->ol_flags.rx = pkt_flags;
> > ?
> > 
> > Konstantin
> >
> I would tend to agree with this. Changchun, did you get to assess the 
> performance impact of making this change to the PMDs? I suspect that making 
> the changes to each PMD would impact performance, while Konstantin's 
> suggestion should eliminate that impact.
> The downside there is that we are limiting the flexibility we have in 
> expanding beyond 32 RX flags and 24 TX flags. :-(
> 
> /Bruce
> 

How about switching things about in terms of the flag. Instead of having to 
manage a flag across the baord to indicate if an mbuf is pointing to 
external memory, I think we should use the flag to indicate that an mbuf is 
attached to the memory space of another mbuf. 

My reasons for suggesting this are:
1. Mbufs pointing to externally managed memory are not really the problem to 
be dealt with on free, since they can be handled the same as mbufs with the 
data pointer pointing internally, it's mbufs attached to other mbufs which 
are - so that's what we need to track using a flag.
2. Setting the flag to indicate an indirect mbuf should have no impact on 
the driver, as an mbuf that has just been allocated from mempool cannot be 
an indirect one.
3. The only place we would need to worry about such a flag is in the attach, 
detach and free mbuf functions - and on free we would simply need to replace 
the existing check for "md != m" with a new check for the new flag. It would 
be a contained change.

Thoughts?
/Bruce
  
Ananyev, Konstantin Oct. 24, 2014, 3:58 p.m. UTC | #4
> -----Original Message-----
> From: Richardson, Bruce
> Sent: Friday, October 24, 2014 4:43 PM
> To: Ananyev, Konstantin
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH 2/3] pmd: RX function need keep EXTERNAL_MBUF flag
> 
> On Fri, Oct 24, 2014 at 01:34:58PM +0100, Bruce Richardson wrote:
> > On Fri, Oct 24, 2014 at 10:46:06AM +0000, Ananyev, Konstantin wrote:
> > > Hi Changchun,
> > >
> > > > -----Original Message-----
> > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ouyang Changchun
> > > > Sent: Friday, October 24, 2014 9:10 AM
> > > > To: dev@dpdk.org
> > > > Subject: [dpdk-dev] [PATCH 2/3] pmd: RX function need keep EXTERNAL_MBUF flag
> > > >
> > > > Every pmd RX function need keep the EXTERNAL_MBUF flag
> > > > in mbuf.ol_flags, and can't overwrite it when filling ol_flags from
> > > > descriptor to mbuf, otherwise, it probably cause to crash when freeing a mbuf
> > > > and trying to freeing its attached external buffer, say, from guest space.
> > > >
> > >
> > > Don't really like the idea to put:
> > > mb->ol_flags = pkt_flags | (mb->ol_flags & EXTERNAL_MBUF);
> > > in each and every PMD from now on...
> > >
> > > From other side, it is probably not very good that RX functions update whole ol_flags, not only RX related part.
> > > Wonder can we reserve low 32bits of ol_flags for RX, and high 32bits for TX and generic stuff.
> > > So our ol_flags will look something like that:
> > >
> > > union {
> > > 	uint64_t ol_raw_flags;
> > > 	struct {
> > > 		uint32_t rx;
> > > 		uint32_t gen_tx;
> > > 	} ol_flags
> > > };
> > >
> > > And make all PMD RX functions to operate on rx part of the flags only:
> > > mb->ol_flags.rx = pkt_flags;
> > > ?
> > >
> > > Konstantin
> > >
> > I would tend to agree with this. Changchun, did you get to assess the
> > performance impact of making this change to the PMDs? I suspect that making
> > the changes to each PMD would impact performance, while Konstantin's
> > suggestion should eliminate that impact.
> > The downside there is that we are limiting the flexibility we have in
> > expanding beyond 32 RX flags and 24 TX flags. :-(
> >
> > /Bruce
> >
> 
> How about switching things about in terms of the flag. Instead of having to
> manage a flag across the baord to indicate if an mbuf is pointing to
> external memory, I think we should use the flag to indicate that an mbuf is
> attached to the memory space of another mbuf.
> 
> My reasons for suggesting this are:
> 1. Mbufs pointing to externally managed memory are not really the problem to
> be dealt with on free, since they can be handled the same as mbufs with the
> data pointer pointing internally, it's mbufs attached to other mbufs which
> are - so that's what we need to track using a flag.
> 2. Setting the flag to indicate an indirect mbuf should have no impact on
> the driver, as an mbuf that has just been allocated from mempool cannot be
> an indirect one.
> 3. The only place we would need to worry about such a flag is in the attach,
> detach and free mbuf functions - and on free we would simply need to replace
> the existing check for "md != m" with a new check for the new flag. It would
> be a contained change.
> 

Sounds good to me.
That's' definitely much better than my proposal.
Plus, if we'll stop to rely on:

  md = RTE_MBUF_FROM_BADDR(m->buf_addr);
  if (unlikely (md != m)) {

That will allow us to set  buf_addr to some other valid offset inside mbuf
and that fix an old problem with mbufs extra metadata (userdata) stored in the packet's headroom. 

Konstantin

> Thoughts?
> /Bruce
  
Ouyang Changchun Oct. 25, 2014, 2:08 a.m. UTC | #5
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ananyev,
> Konstantin
> Sent: Friday, October 24, 2014 11:58 PM
> To: Richardson, Bruce
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH 2/3] pmd: RX function need keep
> EXTERNAL_MBUF flag
> 
> 
> 
> > -----Original Message-----
> > From: Richardson, Bruce
> > Sent: Friday, October 24, 2014 4:43 PM
> > To: Ananyev, Konstantin
> > Cc: dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH 2/3] pmd: RX function need keep
> > EXTERNAL_MBUF flag
> >
> > On Fri, Oct 24, 2014 at 01:34:58PM +0100, Bruce Richardson wrote:
> > > On Fri, Oct 24, 2014 at 10:46:06AM +0000, Ananyev, Konstantin wrote:
> > > > Hi Changchun,
> > > >
> > > > > -----Original Message-----
> > > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ouyang
> > > > > Changchun
> > > > > Sent: Friday, October 24, 2014 9:10 AM
> > > > > To: dev@dpdk.org
> > > > > Subject: [dpdk-dev] [PATCH 2/3] pmd: RX function need keep
> > > > > EXTERNAL_MBUF flag
> > > > >
> > > > > Every pmd RX function need keep the EXTERNAL_MBUF flag in
> > > > > mbuf.ol_flags, and can't overwrite it when filling ol_flags from
> > > > > descriptor to mbuf, otherwise, it probably cause to crash when
> > > > > freeing a mbuf and trying to freeing its attached external buffer, say,
> from guest space.
> > > > >
> > > >
> > > > Don't really like the idea to put:
> > > > mb->ol_flags = pkt_flags | (mb->ol_flags & EXTERNAL_MBUF);
> > > > in each and every PMD from now on...
> > > >
> > > > From other side, it is probably not very good that RX functions update
> whole ol_flags, not only RX related part.
> > > > Wonder can we reserve low 32bits of ol_flags for RX, and high 32bits for
> TX and generic stuff.
> > > > So our ol_flags will look something like that:
> > > >
> > > > union {
> > > > 	uint64_t ol_raw_flags;
> > > > 	struct {
> > > > 		uint32_t rx;
> > > > 		uint32_t gen_tx;
> > > > 	} ol_flags
> > > > };
> > > >
> > > > And make all PMD RX functions to operate on rx part of the flags only:
> > > > mb->ol_flags.rx = pkt_flags;
> > > > ?
> > > >
> > > > Konstantin
> > > >
> > > I would tend to agree with this. Changchun, did you get to assess
> > > the performance impact of making this change to the PMDs? I suspect
> > > that making the changes to each PMD would impact performance, while
> > > Konstantin's suggestion should eliminate that impact.
> > > The downside there is that we are limiting the flexibility we have
> > > in expanding beyond 32 RX flags and 24 TX flags. :-(
> > >
> > > /Bruce
> > >
> >
> > How about switching things about in terms of the flag. Instead of
> > having to manage a flag across the baord to indicate if an mbuf is
> > pointing to external memory, I think we should use the flag to
> > indicate that an mbuf is attached to the memory space of another mbuf.
> >
> > My reasons for suggesting this are:
> > 1. Mbufs pointing to externally managed memory are not really the
> > problem to be dealt with on free, since they can be handled the same
> > as mbufs with the data pointer pointing internally, it's mbufs
> > attached to other mbufs which are - so that's what we need to track using a
> flag.
> > 2. Setting the flag to indicate an indirect mbuf should have no impact
> > on the driver, as an mbuf that has just been allocated from mempool
> > cannot be an indirect one.
> > 3. The only place we would need to worry about such a flag is in the
> > attach, detach and free mbuf functions - and on free we would simply
> > need to replace the existing check for "md != m" with a new check for
> > the new flag. It would be a contained change.
> >
> 
> Sounds good to me.
> That's' definitely much better than my proposal.
> Plus, if we'll stop to rely on:
> 
>   md = RTE_MBUF_FROM_BADDR(m->buf_addr);
>   if (unlikely (md != m)) {
> 

Currently seems good to me, too. But need more practice on it.

> That will allow us to set  buf_addr to some other valid offset inside mbuf and
> that fix an old problem with mbufs extra metadata (userdata) stored in the
> packet's headroom.
> 
Not fully understand this. Konstantin, would you explain more?
Thanks
Changchun
  

Patch

diff --git a/lib/librte_pmd_e1000/igb_rxtx.c b/lib/librte_pmd_e1000/igb_rxtx.c
index f09c525..4123310 100644
--- a/lib/librte_pmd_e1000/igb_rxtx.c
+++ b/lib/librte_pmd_e1000/igb_rxtx.c
@@ -786,7 +786,7 @@  eth_igb_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
 		pkt_flags = pkt_flags | rx_desc_status_to_pkt_flags(staterr);
 		pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
-		rxm->ol_flags = pkt_flags;
+		rxm->ol_flags = pkt_flags | (rxm->ol_flags & EXTERNAL_MBUF);
 
 		/*
 		 * Store the mbuf address into the next entry of the array
@@ -1020,7 +1020,8 @@  eth_igb_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
 		pkt_flags = pkt_flags | rx_desc_status_to_pkt_flags(staterr);
 		pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
-		first_seg->ol_flags = pkt_flags;
+		first_seg->ol_flags = pkt_flags |
+			(first_seg->ol_flags & EXTERNAL_MBUF);
 
 		/* Prefetch data of first segment, if configured to do so. */
 		rte_packet_prefetch((char *)first_seg->buf_addr +
diff --git a/lib/librte_pmd_i40e/i40e_rxtx.c b/lib/librte_pmd_i40e/i40e_rxtx.c
index 2b53677..68c3695 100644
--- a/lib/librte_pmd_i40e/i40e_rxtx.c
+++ b/lib/librte_pmd_i40e/i40e_rxtx.c
@@ -637,7 +637,8 @@  i40e_rx_scan_hw_ring(struct i40e_rx_queue *rxq)
 			pkt_flags = i40e_rxd_status_to_pkt_flags(qword1);
 			pkt_flags |= i40e_rxd_error_to_pkt_flags(qword1);
 			pkt_flags |= i40e_rxd_ptype_to_pkt_flags(qword1);
-			mb->ol_flags = pkt_flags;
+			mb->ol_flags = pkt_flags |
+				(mb->ol_flags & EXTERNAL_MBUF);
 			if (pkt_flags & PKT_RX_RSS_HASH)
 				mb->hash.rss = rte_le_to_cpu_32(\
 					rxdp->wb.qword0.hi_dword.rss);
@@ -873,7 +874,7 @@  i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		pkt_flags = i40e_rxd_status_to_pkt_flags(qword1);
 		pkt_flags |= i40e_rxd_error_to_pkt_flags(qword1);
 		pkt_flags |= i40e_rxd_ptype_to_pkt_flags(qword1);
-		rxm->ol_flags = pkt_flags;
+		rxm->ol_flags = pkt_flags | (rxm->ol_flags & EXTERNAL_MBUF);
 		if (pkt_flags & PKT_RX_RSS_HASH)
 			rxm->hash.rss =
 				rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss);
@@ -1027,7 +1028,8 @@  i40e_recv_scattered_pkts(void *rx_queue,
 		pkt_flags = i40e_rxd_status_to_pkt_flags(qword1);
 		pkt_flags |= i40e_rxd_error_to_pkt_flags(qword1);
 		pkt_flags |= i40e_rxd_ptype_to_pkt_flags(qword1);
-		first_seg->ol_flags = pkt_flags;
+		first_seg->ol_flags = pkt_flags |
+			(first_seg->ol_flags & EXTERNAL_MBUF);
 		if (pkt_flags & PKT_RX_RSS_HASH)
 			rxm->hash.rss =
 				rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss);
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index 1aefe5c..77e8689 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -949,7 +949,8 @@  ixgbe_rx_scan_hw_ring(struct igb_rx_queue *rxq)
 			/* reuse status field from scan list */
 			pkt_flags |= rx_desc_status_to_pkt_flags(s[j]);
 			pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
-			mb->ol_flags = pkt_flags;
+			mb->ol_flags = pkt_flags |
+				(mb->ol_flags & EXTERNAL_MBUF);
 
 			if (likely(pkt_flags & PKT_RX_RSS_HASH))
 				mb->hash.rss = rxdp[j].wb.lower.hi_dword.rss;
@@ -1271,7 +1272,7 @@  ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
 		pkt_flags = pkt_flags | rx_desc_status_to_pkt_flags(staterr);
 		pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
-		rxm->ol_flags = pkt_flags;
+		rxm->ol_flags = pkt_flags | (rxm->ol_flags & EXTERNAL_MBUF);
 
 		if (likely(pkt_flags & PKT_RX_RSS_HASH))
 			rxm->hash.rss = rxd.wb.lower.hi_dword.rss;
@@ -1515,7 +1516,8 @@  ixgbe_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 				rx_desc_status_to_pkt_flags(staterr));
 		pkt_flags = (uint16_t)(pkt_flags |
 				rx_desc_error_to_pkt_flags(staterr));
-		first_seg->ol_flags = pkt_flags;
+		first_seg->ol_flags = pkt_flags |
+				(first_seg->ol_flags & EXTERNAL_MBUF);
 
 		if (likely(pkt_flags & PKT_RX_RSS_HASH))
 			first_seg->hash.rss = rxd.wb.lower.hi_dword.rss;
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c
index e813e43..af7b1cd 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c
@@ -156,10 +156,14 @@  desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
 	ptype1 = _mm_or_si128(ptype1, vtag1);
 	vol.dword = _mm_cvtsi128_si64(ptype1) & OLFLAGS_MASK_V;
 
-	rx_pkts[0]->ol_flags = vol.e[0];
-	rx_pkts[1]->ol_flags = vol.e[1];
-	rx_pkts[2]->ol_flags = vol.e[2];
-	rx_pkts[3]->ol_flags = vol.e[3];
+	rx_pkts[0]->ol_flags = vol.e[0] |
+		(rx_pkts[0]->ol_flags & EXTERNAL_MBUF);
+	rx_pkts[1]->ol_flags = vol.e[1] |
+		(rx_pkts[1]->ol_flags & EXTERNAL_MBUF);
+	rx_pkts[2]->ol_flags = vol.e[2] |
+		(rx_pkts[2]->ol_flags & EXTERNAL_MBUF);
+	rx_pkts[3]->ol_flags = vol.e[3] |
+		(rx_pkts[3]->ol_flags & EXTERNAL_MBUF);
 }
 #else
 #define desc_to_olflags_v(desc, rx_pkts) do {} while (0)