[v2] kni: fix possible kernel crash with va2pa
Checks
Commit Message
va2pa depends on the physical address and virtual address offset of
current mbuf. It may get the wrong physical address of next mbuf which
allocated in another hugepage segment.
In rte_mempool_populate_default(), trying to allocate whole block of
contiguous memory could be failed. Then, it would reserve memory in
several memzones that have different physical address and virtual address
offsets. The rte_mempool_populate_default() is used by
rte_pktmbuf_pool_create().
Signed-off-by: Yangchao Zhou <zhouyates@gmail.com>
---
v2: Add an explanation that causes this problem.
Use m->next to store physical address.
---
kernel/linux/kni/kni_net.c | 42 +++++++++++--------
.../eal/include/exec-env/rte_kni_common.h | 2 +-
lib/librte_kni/rte_kni.c | 15 ++++++-
3 files changed, 40 insertions(+), 19 deletions(-)
Comments
On 3/12/2019 9:22 AM, Yangchao Zhou wrote:
> va2pa depends on the physical address and virtual address offset of
> current mbuf. It may get the wrong physical address of next mbuf which
> allocated in another hugepage segment.
>
> In rte_mempool_populate_default(), trying to allocate whole block of
> contiguous memory could be failed. Then, it would reserve memory in
> several memzones that have different physical address and virtual address
> offsets. The rte_mempool_populate_default() is used by
> rte_pktmbuf_pool_create().
>
> Signed-off-by: Yangchao Zhou <zhouyates@gmail.com>
> ---
> v2: Add an explanation that causes this problem.
> Use m->next to store physical address.
Overall code looks good to me, thanks for the work, it looks pretty clean.
I would like to do some test before giving an ack.
Meanwhile, can you please update kni documentation, to document for the mbufs
sent to kernel has mbuf->next fields as physical address. It is OK to send as a
new version of the patch with documentation.
Thanks,
ferruh
On 3/12/2019 9:22 AM, Yangchao Zhou wrote:
> va2pa depends on the physical address and virtual address offset of
> current mbuf. It may get the wrong physical address of next mbuf which
> allocated in another hugepage segment.
>
> In rte_mempool_populate_default(), trying to allocate whole block of
> contiguous memory could be failed. Then, it would reserve memory in
> several memzones that have different physical address and virtual address
> offsets. The rte_mempool_populate_default() is used by
> rte_pktmbuf_pool_create().
>
> Signed-off-by: Yangchao Zhou <zhouyates@gmail.com>
> ---
> v2: Add an explanation that causes this problem.
> Use m->next to store physical address.
<...>
> @@ -481,7 +486,7 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
> uint32_t ret;
> uint32_t len;
> uint32_t i, num_rq, num_fq, num;
> - struct rte_kni_mbuf *kva;
> + struct rte_kni_mbuf *kva, *_kva;
> void *data_kva;
> struct sk_buff *skb;
> struct net_device *dev = kni->net_dev;
> @@ -545,8 +550,11 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
> if (!kva->next)
> break;
>
> - kva = pa2kva(va2pa(kva->next, kva));
> + _kva = kva;
> + kva = pa2kva(kva->next);
> data_kva = kva2data_kva(kva);
> + /* Convert physical address to virtual address */
> + _kva->next = pa2va(_kva->next, kva);
> }
> }
>
Also need to update 'kni_net_rx_lo_fifo()', at worst to update 'next' fields
because it fills 'kni->free_q', without conversion userspace will crash.
<...>
> @@ -550,7 +563,7 @@ rte_kni_tx_burst(struct rte_kni *kni, struct rte_mbuf **mbufs, unsigned num)
> unsigned int i;
>
> for (i = 0; i < num; i++)
> - phy_mbufs[i] = va2pa(mbufs[i]);
> + phy_mbufs[i] = va2pa_all(mbufs[i]);
>
> ret = kni_fifo_put(kni->rx_q, phy_mbufs, num);
There is a problem here.
When fifo 'kni->rx_q' is full, 'rte_kni_tx_burst' will send less mbuf than
requested, than the application needs to handle the remaining packages, most
probably will free them, but now some packages has physical address in their
'next' field, which will cause app to crash.
I don't know really how to solve this.
Perhaps getting free count from 'kni->rx_q' and only convert that much
(va2pa_all) to physical address can work, but I can't estimate performance
effect of it.
Hi Yigit,
I was facing the kernel crash issue, at skb_over_put(), using dpdk using 18.11 on 4.19.28 kernel. On checking I did find this patch and this patch is solving the issue also. But then I saw you comment in the patch and that’s looks scary to have the patch. Is there any improvements/fixes planned for this issue and in which version? is there any performance impact of the below patch ? As this issues is blocking our release any inputs to this asap will be really appreciated.
--
Regards,
Souvik
From: dev <dev-bounces@dpdk.org> On Behalf Of Ferruh Yigit
Sent: Friday, March 22, 2019 4:49 PM
To: Yangchao Zhou <zhouyates@gmail.com>; dev@dpdk.org
Subject: Re: [dpdk-dev] [PATCH v2] kni: fix possible kernel crash with va2pa
On Tue, 12 Mar 2019 17:22:32 +0800
Yangchao Zhou <zhouyates@gmail.com> wrote:
> va2pa depends on the physical address and virtual address offset of
> current mbuf. It may get the wrong physical address of next mbuf which
> allocated in another hugepage segment.
>
> In rte_mempool_populate_default(), trying to allocate whole block of
> contiguous memory could be failed. Then, it would reserve memory in
> several memzones that have different physical address and virtual address
> offsets. The rte_mempool_populate_default() is used by
> rte_pktmbuf_pool_create().
>
> Signed-off-by: Yangchao Zhou <zhouyates@gmail.com>
Could you add a Fixes tag?
@@ -61,18 +61,6 @@ kva2data_kva(struct rte_kni_mbuf *m)
return phys_to_virt(m->buf_physaddr + m->data_off);
}
-/* virtual address to physical address */
-static void *
-va2pa(void *va, struct rte_kni_mbuf *m)
-{
- void *pa;
-
- pa = (void *)((unsigned long)va -
- ((unsigned long)m->buf_addr -
- (unsigned long)m->buf_physaddr));
- return pa;
-}
-
/*
* It can be called to process the request.
*/
@@ -173,7 +161,10 @@ kni_fifo_trans_pa2va(struct kni_dev *kni,
struct rte_kni_fifo *src_pa, struct rte_kni_fifo *dst_va)
{
uint32_t ret, i, num_dst, num_rx;
- void *kva;
+ struct rte_kni_mbuf *kva, *_kva;
+ int nb_segs;
+ int kva_nb_segs;
+
do {
num_dst = kni_fifo_free_count(dst_va);
if (num_dst == 0)
@@ -188,6 +179,17 @@ kni_fifo_trans_pa2va(struct kni_dev *kni,
for (i = 0; i < num_rx; i++) {
kva = pa2kva(kni->pa[i]);
kni->va[i] = pa2va(kni->pa[i], kva);
+
+ kva_nb_segs = kva->nb_segs;
+ for (nb_segs = 0; nb_segs < kva_nb_segs; nb_segs++) {
+ if (!kva->next)
+ break;
+
+ _kva = kva;
+ kva = pa2kva(kva->next);
+ /* Convert physical address to virtual address */
+ _kva->next = pa2va(_kva->next, kva);
+ }
}
ret = kni_fifo_put(dst_va, kni->va, num_rx);
@@ -313,7 +315,7 @@ kni_net_rx_normal(struct kni_dev *kni)
uint32_t ret;
uint32_t len;
uint32_t i, num_rx, num_fq;
- struct rte_kni_mbuf *kva;
+ struct rte_kni_mbuf *kva, *_kva;
void *data_kva;
struct sk_buff *skb;
struct net_device *dev = kni->net_dev;
@@ -363,8 +365,11 @@ kni_net_rx_normal(struct kni_dev *kni)
if (!kva->next)
break;
- kva = pa2kva(va2pa(kva->next, kva));
+ _kva = kva;
+ kva = pa2kva(kva->next);
data_kva = kva2data_kva(kva);
+ /* Convert physical address to virtual address */
+ _kva->next = pa2va(_kva->next, kva);
}
}
@@ -481,7 +486,7 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
uint32_t ret;
uint32_t len;
uint32_t i, num_rq, num_fq, num;
- struct rte_kni_mbuf *kva;
+ struct rte_kni_mbuf *kva, *_kva;
void *data_kva;
struct sk_buff *skb;
struct net_device *dev = kni->net_dev;
@@ -545,8 +550,11 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
if (!kva->next)
break;
- kva = pa2kva(va2pa(kva->next, kva));
+ _kva = kva;
+ kva = pa2kva(kva->next);
data_kva = kva2data_kva(kva);
+ /* Convert physical address to virtual address */
+ _kva->next = pa2va(_kva->next, kva);
}
}
@@ -86,7 +86,7 @@ struct rte_kni_mbuf {
/* fields on second cache line */
char pad3[8] __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
void *pool;
- void *next;
+ void *next; /**< Physical address of next mbuf in kernel. */
};
/*
@@ -353,6 +353,19 @@ va2pa(struct rte_mbuf *m)
(unsigned long)m->buf_iova));
}
+static void *
+va2pa_all(struct rte_mbuf *mbuf)
+{
+ void *phy_mbuf = va2pa(mbuf);
+ struct rte_mbuf *next = mbuf->next;
+ while (next) {
+ mbuf->next = va2pa(next);
+ mbuf = next;
+ next = mbuf->next;
+ }
+ return phy_mbuf;
+}
+
static void
obj_free(struct rte_mempool *mp __rte_unused, void *opaque, void *obj,
unsigned obj_idx __rte_unused)
@@ -550,7 +563,7 @@ rte_kni_tx_burst(struct rte_kni *kni, struct rte_mbuf **mbufs, unsigned num)
unsigned int i;
for (i = 0; i < num; i++)
- phy_mbufs[i] = va2pa(mbufs[i]);
+ phy_mbufs[i] = va2pa_all(mbufs[i]);
ret = kni_fifo_put(kni->rx_q, phy_mbufs, num);