[RFC,v2] kni: properly translate pa2va for cloned mbuf

Message ID 202001281642.00SGgPfT032152@lectura.cs.arizona.edu (mailing list archive)
State RFC, archived
Delegated to: Ferruh Yigit
Headers
Series [RFC,v2] kni: properly translate pa2va for cloned mbuf |

Checks

Context Check Description
ci/Intel-compilation fail Compilation issues

Commit Message

Junxiao Shi Jan. 28, 2020, 4:16 p.m. UTC
  Previously, KNI kernel module uses the difference between m->buf_addr
and m->buf_iova to calculate userspace virtual address from physical
address. This works for direct mbufs, but does not work for indirect
(cloned) mbufs that come from another mempool. Transmitting a cloned
mbuf may cause segmentation fault in userspace.

Now, userspace KNI library writes the virtual address of each mbuf
in m->userdata field, and KNI kernel module uses this field to restore
virtual address before putting mbuf into free_q. This approach works
for both direct and indirect mbufs.

NOTE TO REVIEWER - DO NOT MERGE
The idea of this change is at https://bugs.dpdk.org/show_bug.cgi?id=183#c4
Test case is at https://bugs.dpdk.org/show_bug.cgi?id=183#c5
I only modified kni_net_rx_normal function.
If this approach is acceptable, I will modify kni_net_rx_lo_fifo,
kni_net_rx_lo_fifo_skb, and kni_fifo_trans_pa2va(rx_q) as well.

Bugzilla ID: 183

Signed-off-by: Junxiao Shi <git@mail1.yoursunny.com>
---
 kernel/linux/kni/kni_net.c                        | 4 ++--
 lib/librte_eal/linux/eal/include/rte_kni_common.h | 3 ++-
 lib/librte_kni/rte_kni.c                          | 8 ++++++--
 3 files changed, 10 insertions(+), 5 deletions(-)
  

Comments

Ferruh Yigit Feb. 4, 2020, 4:17 p.m. UTC | #1
On 1/28/2020 4:16 PM, Junxiao Shi wrote:
> Previously, KNI kernel module uses the difference between m->buf_addr
> and m->buf_iova to calculate userspace virtual address from physical
> address. This works for direct mbufs, but does not work for indirect
> (cloned) mbufs that come from another mempool. Transmitting a cloned
> mbuf may cause segmentation fault in userspace.

Yes, indirect buffers from another mempool won't work, I think first thing to do
is detect these mbufs and return error to prevent the crash.

> 
> Now, userspace KNI library writes the virtual address of each mbuf
> in m->userdata field, and KNI kernel module uses this field to restore
> virtual address before putting mbuf into free_q. This approach works
> for both direct and indirect mbufs.

Overwriting the mbuf metadata is not ideal and in some cases it may be
unacceptable so I think we can't make it default behavior.


But why not have this as option, need to find a way to pass this option to kni
library.

And kni library may add a flag to mbuf to say the 'userdata' is used to as 'va'
before sending packet to kernel, this can make kernel code unified which can use
'pa2va()' when there is no flag and 'kva->va' when there is.
Adding this flag to mbuf.h prevents the any possible conflict in the future if
@Oliver accepts this.

And in mbuf, not sure if there is flag for mbuf that marks the 'userdata' have
valid value or not. But detecting it in userspace and not overwriting the value
but returning error in that case can be better option.

What do you think?

> 
> NOTE TO REVIEWER - DO NOT MERGE
> The idea of this change is at https://bugs.dpdk.org/show_bug.cgi?id=183#c4
> Test case is at https://bugs.dpdk.org/show_bug.cgi?id=183#c5
> I only modified kni_net_rx_normal function.
> If this approach is acceptable, I will modify kni_net_rx_lo_fifo,
> kni_net_rx_lo_fifo_skb, and kni_fifo_trans_pa2va(rx_q) as well.
> 
> Bugzilla ID: 183
> 
> Signed-off-by: Junxiao Shi <git@mail1.yoursunny.com>
> ---
>  kernel/linux/kni/kni_net.c                        | 4 ++--
>  lib/librte_eal/linux/eal/include/rte_kni_common.h | 3 ++-
>  lib/librte_kni/rte_kni.c                          | 8 ++++++--
>  3 files changed, 10 insertions(+), 5 deletions(-)
> 
> diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
> index 97fe85b..d783545 100644
> --- a/kernel/linux/kni/kni_net.c
> +++ b/kernel/linux/kni/kni_net.c
> @@ -377,7 +377,7 @@ kni_net_rx_normal(struct kni_dev *kni)
>  		kva = get_kva(kni, kni->pa[i]);
>  		len = kva->pkt_len;
>  		data_kva = get_data_kva(kni, kva);
> -		kni->va[i] = pa2va(kni->pa[i], kva);
> +		kni->va[i] = kva->va;
>  
>  		skb = netdev_alloc_skb(dev, len);
>  		if (!skb) {
> @@ -403,7 +403,7 @@ kni_net_rx_normal(struct kni_dev *kni)
>  				kva = pa2kva(kva->next);
>  				data_kva = kva2data_kva(kva);
>  				/* Convert physical address to virtual address */
> -				prev_kva->next = pa2va(prev_kva->next, kva);
> +				prev_kva->next = kva->va;
>  			}
>  		}
>  
> diff --git a/lib/librte_eal/linux/eal/include/rte_kni_common.h b/lib/librte_eal/linux/eal/include/rte_kni_common.h
> index 7313ef5..c694a1d 100644
> --- a/lib/librte_eal/linux/eal/include/rte_kni_common.h
> +++ b/lib/librte_eal/linux/eal/include/rte_kni_common.h
> @@ -86,7 +86,8 @@ struct rte_kni_mbuf {
>  	uint16_t data_len;      /**< Amount of data in segment buffer. */
>  
>  	/* fields on second cache line */
> -	char pad3[8] __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
> +	void *va __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
> +	         /**< Virtual address of this mbuf in userspace (overwrites userdata). */
>  	void *pool;
>  	void *next;             /**< Physical address of next mbuf in kernel. */
>  };
> diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
> index e388751..463485f 100644
> --- a/lib/librte_kni/rte_kni.c
> +++ b/lib/librte_kni/rte_kni.c
> @@ -359,13 +359,15 @@ va2pa(struct rte_mbuf *m)
>  static void *
>  va2pa_all(struct rte_mbuf *mbuf)
>  {
> -	void *phy_mbuf = va2pa(mbuf);
> +	void *phy_mbuf = (void*)rte_mempool_virt2iova(mbuf);
>  	struct rte_mbuf *next = mbuf->next;
>  	while (next) {
> -		mbuf->next = va2pa(next);
> +		mbuf->userdata = mbuf;
> +		mbuf->next = (void*)rte_mempool_virt2iova(next);
>  		mbuf = next;
>  		next = mbuf->next;
>  	}
> +	mbuf->userdata = mbuf;
>  	return phy_mbuf;
>  }
>  
> @@ -652,6 +654,8 @@ kni_allocate_mbufs(struct rte_kni *kni)
>  			 offsetof(struct rte_kni_mbuf, buf_addr));
>  	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, next) !=
>  			 offsetof(struct rte_kni_mbuf, next));
> +	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, userdata) !=
> +			 offsetof(struct rte_kni_mbuf, va));
>  	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_off) !=
>  			 offsetof(struct rte_kni_mbuf, data_off));
>  	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
>
  
yoursunny Feb. 4, 2020, 4:49 p.m. UTC | #2
Hi Ferruh

Thanks for your review.

On Tue, Feb 4, 2020 at 11:17 AM Ferruh Yigit <ferruh.yigit@intel.com> wrote:
>
> On 1/28/2020 4:16 PM, Junxiao Shi wrote:
> > Previously, KNI kernel module uses the difference between m->buf_addr
> > and m->buf_iova to calculate userspace virtual address from physical
> > address. This works for direct mbufs, but does not work for indirect
> > (cloned) mbufs that come from another mempool. Transmitting a cloned
> > mbuf may cause segmentation fault in userspace.
>
> Yes, indirect buffers from another mempool won't work, I think first thing to do
> is detect these mbufs and return error to prevent the crash.
>
> >
> > Now, userspace KNI library writes the virtual address of each mbuf
> > in m->userdata field, and KNI kernel module uses this field to restore
> > virtual address before putting mbuf into free_q. This approach works
> > for both direct and indirect mbufs.
>
> Overwriting the mbuf metadata is not ideal and in some cases it may be
> unacceptable so I think we can't make it default behavior.
>

I believe that after an mbuf has been passed to ethdev for
transmission, it is owned by ethdev, and the user application should
not use it anymore.
This is because, the mbuf could be free'd by ethdev after
transmission, so that it is unsafe for the user application to
dereference its pointer.
Notice that in case the user application has created indirect mbufs
pointing to the same buffer, only the mbuf passed to ethdev would have
its m->userdata field overwritten; other mbufs still held by the
application are unaffected.
This is why I decided to overwrite the m->userdata field.

Can you explain, in what case overwriting mbuf metadata would be unacceptable?

>
> But why not have this as option, need to find a way to pass this option to kni
> library.
>
> And kni library may add a flag to mbuf to say the 'userdata' is used to as 'va'
> before sending packet to kernel, this can make kernel code unified which can use
> 'pa2va()' when there is no flag and 'kva->va' when there is.
> Adding this flag to mbuf.h prevents the any possible conflict in the future if
> @Oliver accepts this.
>
> And in mbuf, not sure if there is flag for mbuf that marks the 'userdata' have
> valid value or not. But detecting it in userspace and not overwriting the value
> but returning error in that case can be better option.
>
> What do you think?
>
> >
> > NOTE TO REVIEWER - DO NOT MERGE
> > The idea of this change is at https://bugs.dpdk.org/show_bug.cgi?id=183#c4
> > Test case is at https://bugs.dpdk.org/show_bug.cgi?id=183#c5
> > I only modified kni_net_rx_normal function.
> > If this approach is acceptable, I will modify kni_net_rx_lo_fifo,
> > kni_net_rx_lo_fifo_skb, and kni_fifo_trans_pa2va(rx_q) as well.
> >
> > Bugzilla ID: 183
> >
> > Signed-off-by: Junxiao Shi <git@mail1.yoursunny.com>
> > ---
> >  kernel/linux/kni/kni_net.c                        | 4 ++--
> >  lib/librte_eal/linux/eal/include/rte_kni_common.h | 3 ++-
> >  lib/librte_kni/rte_kni.c                          | 8 ++++++--
> >  3 files changed, 10 insertions(+), 5 deletions(-)
> >
> > diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
> > index 97fe85b..d783545 100644
> > --- a/kernel/linux/kni/kni_net.c
> > +++ b/kernel/linux/kni/kni_net.c
> > @@ -377,7 +377,7 @@ kni_net_rx_normal(struct kni_dev *kni)
> >               kva = get_kva(kni, kni->pa[i]);
> >               len = kva->pkt_len;
> >               data_kva = get_data_kva(kni, kva);
> > -             kni->va[i] = pa2va(kni->pa[i], kva);
> > +             kni->va[i] = kva->va;
> >
> >               skb = netdev_alloc_skb(dev, len);
> >               if (!skb) {
> > @@ -403,7 +403,7 @@ kni_net_rx_normal(struct kni_dev *kni)
> >                               kva = pa2kva(kva->next);
> >                               data_kva = kva2data_kva(kva);
> >                               /* Convert physical address to virtual address */
> > -                             prev_kva->next = pa2va(prev_kva->next, kva);
> > +                             prev_kva->next = kva->va;
> >                       }
> >               }
> >
> > diff --git a/lib/librte_eal/linux/eal/include/rte_kni_common.h b/lib/librte_eal/linux/eal/include/rte_kni_common.h
> > index 7313ef5..c694a1d 100644
> > --- a/lib/librte_eal/linux/eal/include/rte_kni_common.h
> > +++ b/lib/librte_eal/linux/eal/include/rte_kni_common.h
> > @@ -86,7 +86,8 @@ struct rte_kni_mbuf {
> >       uint16_t data_len;      /**< Amount of data in segment buffer. */
> >
> >       /* fields on second cache line */
> > -     char pad3[8] __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
> > +     void *va __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
> > +              /**< Virtual address of this mbuf in userspace (overwrites userdata). */
> >       void *pool;
> >       void *next;             /**< Physical address of next mbuf in kernel. */
> >  };
> > diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
> > index e388751..463485f 100644
> > --- a/lib/librte_kni/rte_kni.c
> > +++ b/lib/librte_kni/rte_kni.c
> > @@ -359,13 +359,15 @@ va2pa(struct rte_mbuf *m)
> >  static void *
> >  va2pa_all(struct rte_mbuf *mbuf)
> >  {
> > -     void *phy_mbuf = va2pa(mbuf);
> > +     void *phy_mbuf = (void*)rte_mempool_virt2iova(mbuf);
> >       struct rte_mbuf *next = mbuf->next;
> >       while (next) {
> > -             mbuf->next = va2pa(next);
> > +             mbuf->userdata = mbuf;
> > +             mbuf->next = (void*)rte_mempool_virt2iova(next);
> >               mbuf = next;
> >               next = mbuf->next;
> >       }
> > +     mbuf->userdata = mbuf;
> >       return phy_mbuf;
> >  }
> >
> > @@ -652,6 +654,8 @@ kni_allocate_mbufs(struct rte_kni *kni)
> >                        offsetof(struct rte_kni_mbuf, buf_addr));
> >       RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, next) !=
> >                        offsetof(struct rte_kni_mbuf, next));
> > +     RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, userdata) !=
> > +                      offsetof(struct rte_kni_mbuf, va));
> >       RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_off) !=
> >                        offsetof(struct rte_kni_mbuf, data_off));
> >       RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
> >
>
  
Ferruh Yigit Feb. 4, 2020, 5:58 p.m. UTC | #3
On 2/4/2020 4:49 PM, yoursunny wrote:
> Hi Ferruh
> 
> Thanks for your review.
> 
> On Tue, Feb 4, 2020 at 11:17 AM Ferruh Yigit <ferruh.yigit@intel.com> wrote:
>>
>> On 1/28/2020 4:16 PM, Junxiao Shi wrote:
>>> Previously, KNI kernel module uses the difference between m->buf_addr
>>> and m->buf_iova to calculate userspace virtual address from physical
>>> address. This works for direct mbufs, but does not work for indirect
>>> (cloned) mbufs that come from another mempool. Transmitting a cloned
>>> mbuf may cause segmentation fault in userspace.
>>
>> Yes, indirect buffers from another mempool won't work, I think first thing to do
>> is detect these mbufs and return error to prevent the crash.
>>
>>>
>>> Now, userspace KNI library writes the virtual address of each mbuf
>>> in m->userdata field, and KNI kernel module uses this field to restore
>>> virtual address before putting mbuf into free_q. This approach works
>>> for both direct and indirect mbufs.
>>
>> Overwriting the mbuf metadata is not ideal and in some cases it may be
>> unacceptable so I think we can't make it default behavior.
>>
> 
> I believe that after an mbuf has been passed to ethdev for
> transmission, it is owned by ethdev, and the user application should
> not use it anymore.
> This is because, the mbuf could be free'd by ethdev after
> transmission, so that it is unsafe for the user application to
> dereference its pointer.
> Notice that in case the user application has created indirect mbufs
> pointing to the same buffer, only the mbuf passed to ethdev would have
> its m->userdata field overwritten; other mbufs still held by the
> application are unaffected.
> This is why I decided to overwrite the m->userdata field.
> 
> Can you explain, in what case overwriting mbuf metadata would be unacceptable?

If the mbuf sent to kernel space returns back to DPDK application and if next
hope was expecting that metadata field.

'kni_net_rx_lo_fifo()' can cause that but that is mainly for debug, not sure if
that counts really.

I am not quite sure if any packet sent to kernel can return back to circulation,
but trying to be more defensive here, and protect the mbuf metada as much as
possible and not overwrite it via KNI data. That is why suggesting to enable it
via an option instead of making it default.

And 'userdata' seems not really used much as of now, but what if it has been
changed to dynamic field, which was mentioned in the past, and that field end up
being something required by kernel side?
Long shot? Probably. In that case we can make that option default, but I think
better than risking breaking some possible usage.

> 
>>
>> But why not have this as option, need to find a way to pass this option to kni
>> library.
>>
>> And kni library may add a flag to mbuf to say the 'userdata' is used to as 'va'
>> before sending packet to kernel, this can make kernel code unified which can use
>> 'pa2va()' when there is no flag and 'kva->va' when there is.
>> Adding this flag to mbuf.h prevents the any possible conflict in the future if
>> @Oliver accepts this.
>>
>> And in mbuf, not sure if there is flag for mbuf that marks the 'userdata' have
>> valid value or not. But detecting it in userspace and not overwriting the value
>> but returning error in that case can be better option.
>>
>> What do you think?
>>
>>>
>>> NOTE TO REVIEWER - DO NOT MERGE
>>> The idea of this change is at https://bugs.dpdk.org/show_bug.cgi?id=183#c4
>>> Test case is at https://bugs.dpdk.org/show_bug.cgi?id=183#c5
>>> I only modified kni_net_rx_normal function.
>>> If this approach is acceptable, I will modify kni_net_rx_lo_fifo,
>>> kni_net_rx_lo_fifo_skb, and kni_fifo_trans_pa2va(rx_q) as well.
>>>
>>> Bugzilla ID: 183
>>>
>>> Signed-off-by: Junxiao Shi <git@mail1.yoursunny.com>
>>> ---
>>>  kernel/linux/kni/kni_net.c                        | 4 ++--
>>>  lib/librte_eal/linux/eal/include/rte_kni_common.h | 3 ++-
>>>  lib/librte_kni/rte_kni.c                          | 8 ++++++--
>>>  3 files changed, 10 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
>>> index 97fe85b..d783545 100644
>>> --- a/kernel/linux/kni/kni_net.c
>>> +++ b/kernel/linux/kni/kni_net.c
>>> @@ -377,7 +377,7 @@ kni_net_rx_normal(struct kni_dev *kni)
>>>               kva = get_kva(kni, kni->pa[i]);
>>>               len = kva->pkt_len;
>>>               data_kva = get_data_kva(kni, kva);
>>> -             kni->va[i] = pa2va(kni->pa[i], kva);
>>> +             kni->va[i] = kva->va;
>>>
>>>               skb = netdev_alloc_skb(dev, len);
>>>               if (!skb) {
>>> @@ -403,7 +403,7 @@ kni_net_rx_normal(struct kni_dev *kni)
>>>                               kva = pa2kva(kva->next);
>>>                               data_kva = kva2data_kva(kva);
>>>                               /* Convert physical address to virtual address */
>>> -                             prev_kva->next = pa2va(prev_kva->next, kva);
>>> +                             prev_kva->next = kva->va;
>>>                       }
>>>               }
>>>
>>> diff --git a/lib/librte_eal/linux/eal/include/rte_kni_common.h b/lib/librte_eal/linux/eal/include/rte_kni_common.h
>>> index 7313ef5..c694a1d 100644
>>> --- a/lib/librte_eal/linux/eal/include/rte_kni_common.h
>>> +++ b/lib/librte_eal/linux/eal/include/rte_kni_common.h
>>> @@ -86,7 +86,8 @@ struct rte_kni_mbuf {
>>>       uint16_t data_len;      /**< Amount of data in segment buffer. */
>>>
>>>       /* fields on second cache line */
>>> -     char pad3[8] __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
>>> +     void *va __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
>>> +              /**< Virtual address of this mbuf in userspace (overwrites userdata). */
>>>       void *pool;
>>>       void *next;             /**< Physical address of next mbuf in kernel. */
>>>  };
>>> diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
>>> index e388751..463485f 100644
>>> --- a/lib/librte_kni/rte_kni.c
>>> +++ b/lib/librte_kni/rte_kni.c
>>> @@ -359,13 +359,15 @@ va2pa(struct rte_mbuf *m)
>>>  static void *
>>>  va2pa_all(struct rte_mbuf *mbuf)
>>>  {
>>> -     void *phy_mbuf = va2pa(mbuf);
>>> +     void *phy_mbuf = (void*)rte_mempool_virt2iova(mbuf);
>>>       struct rte_mbuf *next = mbuf->next;
>>>       while (next) {
>>> -             mbuf->next = va2pa(next);
>>> +             mbuf->userdata = mbuf;
>>> +             mbuf->next = (void*)rte_mempool_virt2iova(next);
>>>               mbuf = next;
>>>               next = mbuf->next;
>>>       }
>>> +     mbuf->userdata = mbuf;
>>>       return phy_mbuf;
>>>  }
>>>
>>> @@ -652,6 +654,8 @@ kni_allocate_mbufs(struct rte_kni *kni)
>>>                        offsetof(struct rte_kni_mbuf, buf_addr));
>>>       RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, next) !=
>>>                        offsetof(struct rte_kni_mbuf, next));
>>> +     RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, userdata) !=
>>> +                      offsetof(struct rte_kni_mbuf, va));
>>>       RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_off) !=
>>>                        offsetof(struct rte_kni_mbuf, data_off));
>>>       RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
>>>
>>
  

Patch

diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
index 97fe85b..d783545 100644
--- a/kernel/linux/kni/kni_net.c
+++ b/kernel/linux/kni/kni_net.c
@@ -377,7 +377,7 @@  kni_net_rx_normal(struct kni_dev *kni)
 		kva = get_kva(kni, kni->pa[i]);
 		len = kva->pkt_len;
 		data_kva = get_data_kva(kni, kva);
-		kni->va[i] = pa2va(kni->pa[i], kva);
+		kni->va[i] = kva->va;
 
 		skb = netdev_alloc_skb(dev, len);
 		if (!skb) {
@@ -403,7 +403,7 @@  kni_net_rx_normal(struct kni_dev *kni)
 				kva = pa2kva(kva->next);
 				data_kva = kva2data_kva(kva);
 				/* Convert physical address to virtual address */
-				prev_kva->next = pa2va(prev_kva->next, kva);
+				prev_kva->next = kva->va;
 			}
 		}
 
diff --git a/lib/librte_eal/linux/eal/include/rte_kni_common.h b/lib/librte_eal/linux/eal/include/rte_kni_common.h
index 7313ef5..c694a1d 100644
--- a/lib/librte_eal/linux/eal/include/rte_kni_common.h
+++ b/lib/librte_eal/linux/eal/include/rte_kni_common.h
@@ -86,7 +86,8 @@  struct rte_kni_mbuf {
 	uint16_t data_len;      /**< Amount of data in segment buffer. */
 
 	/* fields on second cache line */
-	char pad3[8] __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
+	void *va __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
+	         /**< Virtual address of this mbuf in userspace (overwrites userdata). */
 	void *pool;
 	void *next;             /**< Physical address of next mbuf in kernel. */
 };
diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
index e388751..463485f 100644
--- a/lib/librte_kni/rte_kni.c
+++ b/lib/librte_kni/rte_kni.c
@@ -359,13 +359,15 @@  va2pa(struct rte_mbuf *m)
 static void *
 va2pa_all(struct rte_mbuf *mbuf)
 {
-	void *phy_mbuf = va2pa(mbuf);
+	void *phy_mbuf = (void*)rte_mempool_virt2iova(mbuf);
 	struct rte_mbuf *next = mbuf->next;
 	while (next) {
-		mbuf->next = va2pa(next);
+		mbuf->userdata = mbuf;
+		mbuf->next = (void*)rte_mempool_virt2iova(next);
 		mbuf = next;
 		next = mbuf->next;
 	}
+	mbuf->userdata = mbuf;
 	return phy_mbuf;
 }
 
@@ -652,6 +654,8 @@  kni_allocate_mbufs(struct rte_kni *kni)
 			 offsetof(struct rte_kni_mbuf, buf_addr));
 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, next) !=
 			 offsetof(struct rte_kni_mbuf, next));
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, userdata) !=
+			 offsetof(struct rte_kni_mbuf, va));
 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_off) !=
 			 offsetof(struct rte_kni_mbuf, data_off));
 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=