net/bonding: fix buf corruption in merging un-transmitted packets

Message ID 1534551009-16550-1-git-send-email-jyu@vmware.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series net/bonding: fix buf corruption in merging un-transmitted packets |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Jia Yu Aug. 18, 2018, 12:10 a.m. UTC
  When bond slave devices cannot transmit all packets in bufs array,
tx_burst callback shall merge the un-transmitted packets back to
bufs array. Recent merge logic introduced a bug which causes
invalid mbuf addresses being written to bufs array.
When caller frees the un-transmitted packets, due to invalid addresses,
application will crash.

The fix is avoid shifting mbufs, and directly write un-transmitted
packets back to bufs array.

Signed-off-by: Jia Yu <jyu@vmware.com>
---
 drivers/net/bonding/rte_eth_bond_pmd.c | 98 +++++-----------------------------
 1 file changed, 14 insertions(+), 84 deletions(-)
  

Comments

Chas Williams Aug. 18, 2018, 11:49 p.m. UTC | #1
On Fri, Aug 17, 2018 at 9:46 PM Jia Yu <jyu@vmware.com> wrote:

> When bond slave devices cannot transmit all packets in bufs array,
> tx_burst callback shall merge the un-transmitted packets back to
> bufs array. Recent merge logic introduced a bug which causes
> invalid mbuf addresses being written to bufs array.
>

Can you expand on this a bit?  What was the commit?



> When caller frees the un-transmitted packets, due to invalid addresses,
> application will crash.
>
> The fix is avoid shifting mbufs, and directly write un-transmitted
> packets back to bufs array.
>
> Signed-off-by: Jia Yu <jyu@vmware.com>
> ---
>  drivers/net/bonding/rte_eth_bond_pmd.c | 98
> +++++-----------------------------
>  1 file changed, 14 insertions(+), 84 deletions(-)
>
> diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c
> b/drivers/net/bonding/rte_eth_bond_pmd.c
> index 58f7377..cce973a 100644
> --- a/drivers/net/bonding/rte_eth_bond_pmd.c
> +++ b/drivers/net/bonding/rte_eth_bond_pmd.c
> @@ -303,7 +303,7 @@ bond_ethdev_tx_burst_8023ad_fast_queue(void *queue,
> struct rte_mbuf **bufs,
>         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = {
> 0 };
>         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
>
> -       uint16_t i, j;
> +       uint16_t i;
>
>         if (unlikely(nb_bufs == 0))
>                 return 0;
> @@ -361,31 +361,9 @@ bond_ethdev_tx_burst_8023ad_fast_queue(void *queue,
> struct rte_mbuf **bufs,
>                         slave_tx_fail_count[i] = slave_nb_bufs[i] -
>                                         slave_tx_count;
>                         total_tx_fail_count += slave_tx_fail_count[i];
> -
> -                       /*
> -                        * Shift bufs to beginning of array to allow
> reordering
> -                        * later
> -                        */
> -                       for (j = 0; j < slave_tx_fail_count[i]; j++) {
> -                               slave_bufs[i][j] =
> -                                       slave_bufs[i][(slave_tx_count - 1)
> + j];
> -                       }
> -               }
> -       }
> -
> -       /*
> -        * If there are tx burst failures we move packets to end of bufs to
> -        * preserve expected PMD behaviour of all failed transmitted being
> -        * at the end of the input mbuf array
> -        */
> -       if (unlikely(total_tx_fail_count > 0)) {
> -               int bufs_idx = nb_bufs - total_tx_fail_count - 1;
> -
> -               for (i = 0; i < slave_count; i++) {
> -                       if (slave_tx_fail_count[i] > 0) {
> -                               for (j = 0; j < slave_tx_fail_count[i];
> j++)
> -                                       bufs[bufs_idx++] =
> slave_bufs[i][j];
> -                       }
> +                       memcpy(&bufs[nb_bufs - total_tx_fail_count],
> +                              &slave_bufs[i][slave_tx_count],
> +                              slave_tx_fail_count[i] * sizeof(bufs[0]));
>                 }
>         }
>
> @@ -715,8 +693,8 @@ bond_ethdev_tx_burst_round_robin(void *queue, struct
> rte_mbuf **bufs,
>                                 tx_fail_total += tx_fail_slave;
>
>                                 memcpy(&bufs[nb_pkts - tx_fail_total],
> -
>  &slave_bufs[i][num_tx_slave],
> -                                               tx_fail_slave *
> sizeof(bufs[0]));
> +                                      &slave_bufs[i][num_tx_slave],
> +                                      tx_fail_slave * sizeof(bufs[0]));
>                         }
>                         num_tx_total += num_tx_slave;
>                 }
> @@ -1224,7 +1202,7 @@ bond_ethdev_tx_burst_balance(void *queue, struct
> rte_mbuf **bufs,
>         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = {
> 0 };
>         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
>
> -       uint16_t i, j;
> +       uint16_t i;
>
>         if (unlikely(nb_bufs == 0))
>                 return 0;
> @@ -1268,31 +1246,9 @@ bond_ethdev_tx_burst_balance(void *queue, struct
> rte_mbuf **bufs,
>                         slave_tx_fail_count[i] = slave_nb_bufs[i] -
>                                         slave_tx_count;
>                         total_tx_fail_count += slave_tx_fail_count[i];
> -
> -                       /*
> -                        * Shift bufs to beginning of array to allow
> reordering
> -                        * later
> -                        */
> -                       for (j = 0; j < slave_tx_fail_count[i]; j++) {
> -                               slave_bufs[i][j] =
> -                                       slave_bufs[i][(slave_tx_count - 1)
> + j];
> -                       }
> -               }
> -       }
> -
> -       /*
> -        * If there are tx burst failures we move packets to end of bufs to
> -        * preserve expected PMD behaviour of all failed transmitted being
> -        * at the end of the input mbuf array
> -        */
> -       if (unlikely(total_tx_fail_count > 0)) {
> -               int bufs_idx = nb_bufs - total_tx_fail_count - 1;
> -
> -               for (i = 0; i < slave_count; i++) {
> -                       if (slave_tx_fail_count[i] > 0) {
> -                               for (j = 0; j < slave_tx_fail_count[i];
> j++)
> -                                       bufs[bufs_idx++] =
> slave_bufs[i][j];
> -                       }
> +                       memcpy(&bufs[nb_bufs - total_tx_fail_count],
> +                              &slave_bufs[i][slave_tx_count],
> +                              slave_tx_fail_count[i] * sizeof(bufs[0]));
>                 }
>         }
>
> @@ -1322,7 +1278,7 @@ bond_ethdev_tx_burst_8023ad(void *queue, struct
> rte_mbuf **bufs,
>         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = {
> 0 };
>         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
>
> -       uint16_t i, j;
> +       uint16_t i;
>
>         if (unlikely(nb_bufs == 0))
>                 return 0;
> @@ -1384,35 +1340,9 @@ bond_ethdev_tx_burst_8023ad(void *queue, struct
> rte_mbuf **bufs,
>                                                 slave_tx_count;
>                                 total_tx_fail_count +=
> slave_tx_fail_count[i];
>
> -                               /*
> -                                * Shift bufs to beginning of array to
> allow
> -                                * reordering later
> -                                */
> -                               for (j = 0; j < slave_tx_fail_count[i];
> j++)
> -                                       slave_bufs[i][j] =
> -                                               slave_bufs[i]
> -                                                       [(slave_tx_count -
> 1)
> -                                                       + j];
> -                       }
> -               }
> -
> -               /*
> -                * If there are tx burst failures we move packets to end of
> -                * bufs to preserve expected PMD behaviour of all failed
> -                * transmitted being at the end of the input mbuf array
> -                */
> -               if (unlikely(total_tx_fail_count > 0)) {
> -                       int bufs_idx = nb_bufs - total_tx_fail_count - 1;
> -
> -                       for (i = 0; i < slave_count; i++) {
> -                               if (slave_tx_fail_count[i] > 0) {
> -                                       for (j = 0;
> -                                               j < slave_tx_fail_count[i];
> -                                               j++) {
> -                                               bufs[bufs_idx++] =
> -                                                       slave_bufs[i][j];
> -                                       }
> -                               }
> +                               memcpy(&bufs[nb_bufs -
> total_tx_fail_count],
> +                                      &slave_bufs[i][slave_tx_count],
> +                                      slave_tx_fail_count[i] *
> sizeof(bufs[0]));
>                         }
>                 }
>         }
> --
> 2.7.4
>
>
  
Jia Yu Aug. 19, 2018, 10:19 p.m. UTC | #2
Hi Chas,

Thanks for reviewing the change.

Our application crashed after upgrading to DPDK 18.02, when packet rate is high and bond is configured. It happened because txq contains invalid mbuf addresses after rte_eth_tx_burst call (for example, 0x100000000 repeated 13 times in one core dump). It seems that the invalid addresses came from buf shift code below, so I changed this part of code to earlier version. We don’t see crash after the fix. Please let us know if the fix is reasonable.

m_table = {0x7fdf23a68ec0, 0x7fdf23a68400, 0x7fdf23a66e80, 0x7fdf23a65900,
    0x7fdf23960e00, 0x100000000 <repeats 13 times>, 0x7fdf23978640, 0x7fdf23977b80, 0x7fdf239745c0, 0x7fdf23a4d600, 0x7fdf23a4cb40,
    0x7fdf23a75b00, 0x7fdf23a74580, 0x7fdf23972580, 0x7fdf239a76c0, 0x7fdf239a5680, 0x7fdf23a7a640, 0x7fdf23a79b80, 0x7fdf23a77b40,
    0x7fdf2395b800}

/* Send packet burst on each slave device */
    for (i = 0; i < slave_count; i++) {
        if (slave_nb_bufs[i] == 0)
            continue;

        slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
                bd_tx_q->queue_id, slave_bufs[i],
                slave_nb_bufs[i]);

        total_tx_count += slave_tx_count;

        /* If tx burst fails move packets to end of bufs */
        if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
            slave_tx_fail_count[i] = slave_nb_bufs[i] -
                    slave_tx_count;
            total_tx_fail_count += slave_tx_fail_count[i];

            /*
             * Shift bufs to beginning of array to allow reordering
             * later
             */
            for (j = 0; j < slave_tx_fail_count[i]; j++) {
                slave_bufs[i][j] =
                    slave_bufs[i][(slave_tx_count - 1) + j]; <==== what if slave_tx_count == 0, j == 0
            }
        }
    }

Thanks,
Jia

From: Chas Williams <3chas3@gmail.com>
Date: Saturday, August 18, 2018 at 4:50 PM
To: Jia Yu <jyu@vmware.com>
Cc: "dev@dpdk.org" <dev@dpdk.org>, Declan Doherty <declan.doherty@intel.com>, Chas Williams <chas3@att.com>
Subject: Re: [dpdk-dev] [PATCH] net/bonding: fix buf corruption in merging un-transmitted packets



On Fri, Aug 17, 2018 at 9:46 PM Jia Yu <jyu@vmware.com<mailto:jyu@vmware.com>> wrote:
When bond slave devices cannot transmit all packets in bufs array,
tx_burst callback shall merge the un-transmitted packets back to
bufs array. Recent merge logic introduced a bug which causes
invalid mbuf addresses being written to bufs array.

Can you expand on this a bit?  What was the commit?


When caller frees the un-transmitted packets, due to invalid addresses,
application will crash.

The fix is avoid shifting mbufs, and directly write un-transmitted
packets back to bufs array.

Signed-off-by: Jia Yu <jyu@vmware.com<mailto:jyu@vmware.com>>
---
 drivers/net/bonding/rte_eth_bond_pmd.c | 98 +++++-----------------------------
 1 file changed, 14 insertions(+), 84 deletions(-)

diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c
index 58f7377..cce973a 100644
--- a/drivers/net/bonding/rte_eth_bond_pmd.c
+++ b/drivers/net/bonding/rte_eth_bond_pmd.c
@@ -303,7 +303,7 @@ bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
        uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
        uint16_t total_tx_count = 0, total_tx_fail_count = 0;

-       uint16_t i, j;
+       uint16_t i;

        if (unlikely(nb_bufs == 0))
                return 0;
@@ -361,31 +361,9 @@ bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
                        slave_tx_fail_count[i] = slave_nb_bufs[i] -
                                        slave_tx_count;
                        total_tx_fail_count += slave_tx_fail_count[i];
-
-                       /*
-                        * Shift bufs to beginning of array to allow reordering
-                        * later
-                        */
-                       for (j = 0; j < slave_tx_fail_count[i]; j++) {
-                               slave_bufs[i][j] =
-                                       slave_bufs[i][(slave_tx_count - 1) + j];
-                       }
-               }
-       }
-
-       /*
-        * If there are tx burst failures we move packets to end of bufs to
-        * preserve expected PMD behaviour of all failed transmitted being
-        * at the end of the input mbuf array
-        */
-       if (unlikely(total_tx_fail_count > 0)) {
-               int bufs_idx = nb_bufs - total_tx_fail_count - 1;
-
-               for (i = 0; i < slave_count; i++) {
-                       if (slave_tx_fail_count[i] > 0) {
-                               for (j = 0; j < slave_tx_fail_count[i]; j++)
-                                       bufs[bufs_idx++] = slave_bufs[i][j];
-                       }
+                       memcpy(&bufs[nb_bufs - total_tx_fail_count],
+                              &slave_bufs[i][slave_tx_count],
+                              slave_tx_fail_count[i] * sizeof(bufs[0]));
                }
        }

@@ -715,8 +693,8 @@ bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
                                tx_fail_total += tx_fail_slave;

                                memcpy(&bufs[nb_pkts - tx_fail_total],
-                                               &slave_bufs[i][num_tx_slave],
-                                               tx_fail_slave * sizeof(bufs[0]));
+                                      &slave_bufs[i][num_tx_slave],
+                                      tx_fail_slave * sizeof(bufs[0]));
                        }
                        num_tx_total += num_tx_slave;
                }
@@ -1224,7 +1202,7 @@ bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
        uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
        uint16_t total_tx_count = 0, total_tx_fail_count = 0;

-       uint16_t i, j;
+       uint16_t i;

        if (unlikely(nb_bufs == 0))
                return 0;
@@ -1268,31 +1246,9 @@ bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
                        slave_tx_fail_count[i] = slave_nb_bufs[i] -
                                        slave_tx_count;
                        total_tx_fail_count += slave_tx_fail_count[i];
-
-                       /*
-                        * Shift bufs to beginning of array to allow reordering
-                        * later
-                        */
-                       for (j = 0; j < slave_tx_fail_count[i]; j++) {
-                               slave_bufs[i][j] =
-                                       slave_bufs[i][(slave_tx_count - 1) + j];
-                       }
-               }
-       }
-
-       /*
-        * If there are tx burst failures we move packets to end of bufs to
-        * preserve expected PMD behaviour of all failed transmitted being
-        * at the end of the input mbuf array
-        */
-       if (unlikely(total_tx_fail_count > 0)) {
-               int bufs_idx = nb_bufs - total_tx_fail_count - 1;
-
-               for (i = 0; i < slave_count; i++) {
-                       if (slave_tx_fail_count[i] > 0) {
-                               for (j = 0; j < slave_tx_fail_count[i]; j++)
-                                       bufs[bufs_idx++] = slave_bufs[i][j];
-                       }
+                       memcpy(&bufs[nb_bufs - total_tx_fail_count],
+                              &slave_bufs[i][slave_tx_count],
+                              slave_tx_fail_count[i] * sizeof(bufs[0]));
                }
        }

@@ -1322,7 +1278,7 @@ bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
        uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
        uint16_t total_tx_count = 0, total_tx_fail_count = 0;

-       uint16_t i, j;
+       uint16_t i;

        if (unlikely(nb_bufs == 0))
                return 0;
@@ -1384,35 +1340,9 @@ bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
                                                slave_tx_count;
                                total_tx_fail_count += slave_tx_fail_count[i];

-                               /*
-                                * Shift bufs to beginning of array to allow
-                                * reordering later
-                                */
-                               for (j = 0; j < slave_tx_fail_count[i]; j++)
-                                       slave_bufs[i][j] =
-                                               slave_bufs[i]
-                                                       [(slave_tx_count - 1)
-                                                       + j];
-                       }
-               }
-
-               /*
-                * If there are tx burst failures we move packets to end of
-                * bufs to preserve expected PMD behaviour of all failed
-                * transmitted being at the end of the input mbuf array
-                */
-               if (unlikely(total_tx_fail_count > 0)) {
-                       int bufs_idx = nb_bufs - total_tx_fail_count - 1;
-
-                       for (i = 0; i < slave_count; i++) {
-                               if (slave_tx_fail_count[i] > 0) {
-                                       for (j = 0;
-                                               j < slave_tx_fail_count[i];
-                                               j++) {
-                                               bufs[bufs_idx++] =
-                                                       slave_bufs[i][j];
-                                       }
-                               }
+                               memcpy(&bufs[nb_bufs - total_tx_fail_count],
+                                      &slave_bufs[i][slave_tx_count],
+                                      slave_tx_fail_count[i] * sizeof(bufs[0]));
                        }
                }
        }
--
2.7.4
  
Chas Williams Aug. 19, 2018, 11:20 p.m. UTC | #3
On Sun, Aug 19, 2018 at 6:19 PM Jia Yu <jyu@vmware.com> wrote:

> Hi Chas,
>
>
>
> Thanks for reviewing the change.
>
>
>
> Our application crashed after upgrading to DPDK 18.02, when packet rate is
> high and bond is configured. It happened because txq contains invalid mbuf
> addresses after rte_eth_tx_burst call (for example, 0x100000000 repeated 13
> times in one core dump). It seems that the invalid addresses came from buf
> shift code below, so I changed this part of code to earlier version. We
> don’t see crash after the fix. Please let us know if the fix is reasonable.
>

Sorry, I was unclear.  Can you reference the previous commit that broke what
you are fixed?  I would like to see a "Fixes" line.

See https://core.dpdk.org/contribute/ for guidance or just examine git log
for an example of what Fixes: should look like.

I am guessing you meant 09150784a776 ("net/bonding: burst mode hash
calculation")
in your commit message?


>
>
> m_table = {0x7fdf23a68ec0, 0x7fdf23a68400, 0x7fdf23a66e80, 0x7fdf23a65900,
>     0x7fdf23960e00, 0x100000000 <repeats 13 times>, 0x7fdf23978640,
> 0x7fdf23977b80, 0x7fdf239745c0, 0x7fdf23a4d600, 0x7fdf23a4cb40,
>     0x7fdf23a75b00, 0x7fdf23a74580, 0x7fdf23972580, 0x7fdf239a76c0,
> 0x7fdf239a5680, 0x7fdf23a7a640, 0x7fdf23a79b80, 0x7fdf23a77b40,
>     0x7fdf2395b800}
>
>
>
> /* Send packet burst on each slave device */
>     for (i = 0; i < slave_count; i++) {
>         if (slave_nb_bufs[i] == 0)
>             continue;
>
>
>         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
>                 bd_tx_q->queue_id, slave_bufs[i],
>                 slave_nb_bufs[i]);
>
>
>         total_tx_count += slave_tx_count;
>
>
>         /* If tx burst fails move packets to end of bufs */
>         if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
>             slave_tx_fail_count[i] = slave_nb_bufs[i] -
>                     slave_tx_count;
>             total_tx_fail_count += slave_tx_fail_count[i];
>
>
>             /*
>              * Shift bufs to beginning of array to allow reordering
>              * later
>              */
>             for (j = 0; j < slave_tx_fail_count[i]; j++) {
>                 slave_bufs[i][j] =
>                     slave_bufs[i][(slave_tx_count - 1) + j]; <==== what if
> slave_tx_count == 0, j == 0
>             }
>         }
>     }
>
>
>
> Thanks,
>
> Jia
>
>
>
> *From: *Chas Williams <3chas3@gmail.com>
> *Date: *Saturday, August 18, 2018 at 4:50 PM
> *To: *Jia Yu <jyu@vmware.com>
> *Cc: *"dev@dpdk.org" <dev@dpdk.org>, Declan Doherty <
> declan.doherty@intel.com>, Chas Williams <chas3@att.com>
> *Subject: *Re: [dpdk-dev] [PATCH] net/bonding: fix buf corruption in
> merging un-transmitted packets
>
>
>
>
>
>
>
> On Fri, Aug 17, 2018 at 9:46 PM Jia Yu <jyu@vmware.com> wrote:
>
> When bond slave devices cannot transmit all packets in bufs array,
> tx_burst callback shall merge the un-transmitted packets back to
> bufs array. Recent merge logic introduced a bug which causes
> invalid mbuf addresses being written to bufs array.
>
>
>
> Can you expand on this a bit?  What was the commit?
>
>
>
>
>
> When caller frees the un-transmitted packets, due to invalid addresses,
> application will crash.
>
> The fix is avoid shifting mbufs, and directly write un-transmitted
> packets back to bufs array.
>
> Signed-off-by: Jia Yu <jyu@vmware.com>
> ---
>  drivers/net/bonding/rte_eth_bond_pmd.c | 98
> +++++-----------------------------
>  1 file changed, 14 insertions(+), 84 deletions(-)
>
> diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c
> b/drivers/net/bonding/rte_eth_bond_pmd.c
> index 58f7377..cce973a 100644
> --- a/drivers/net/bonding/rte_eth_bond_pmd.c
> +++ b/drivers/net/bonding/rte_eth_bond_pmd.c
> @@ -303,7 +303,7 @@ bond_ethdev_tx_burst_8023ad_fast_queue(void *queue,
> struct rte_mbuf **bufs,
>         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = {
> 0 };
>         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
>
> -       uint16_t i, j;
> +       uint16_t i;
>
>         if (unlikely(nb_bufs == 0))
>                 return 0;
> @@ -361,31 +361,9 @@ bond_ethdev_tx_burst_8023ad_fast_queue(void *queue,
> struct rte_mbuf **bufs,
>                         slave_tx_fail_count[i] = slave_nb_bufs[i] -
>                                         slave_tx_count;
>                         total_tx_fail_count += slave_tx_fail_count[i];
> -
> -                       /*
> -                        * Shift bufs to beginning of array to allow
> reordering
> -                        * later
> -                        */
> -                       for (j = 0; j < slave_tx_fail_count[i]; j++) {
> -                               slave_bufs[i][j] =
> -                                       slave_bufs[i][(slave_tx_count - 1)
> + j];
> -                       }
> -               }
> -       }
> -
> -       /*
> -        * If there are tx burst failures we move packets to end of bufs to
> -        * preserve expected PMD behaviour of all failed transmitted being
> -        * at the end of the input mbuf array
> -        */
> -       if (unlikely(total_tx_fail_count > 0)) {
> -               int bufs_idx = nb_bufs - total_tx_fail_count - 1;
> -
> -               for (i = 0; i < slave_count; i++) {
> -                       if (slave_tx_fail_count[i] > 0) {
> -                               for (j = 0; j < slave_tx_fail_count[i];
> j++)
> -                                       bufs[bufs_idx++] =
> slave_bufs[i][j];
> -                       }
> +                       memcpy(&bufs[nb_bufs - total_tx_fail_count],
> +                              &slave_bufs[i][slave_tx_count],
> +                              slave_tx_fail_count[i] * sizeof(bufs[0]));
>                 }
>         }
>
> @@ -715,8 +693,8 @@ bond_ethdev_tx_burst_round_robin(void *queue, struct
> rte_mbuf **bufs,
>                                 tx_fail_total += tx_fail_slave;
>
>                                 memcpy(&bufs[nb_pkts - tx_fail_total],
> -
>  &slave_bufs[i][num_tx_slave],
> -                                               tx_fail_slave *
> sizeof(bufs[0]));
> +                                      &slave_bufs[i][num_tx_slave],
> +                                      tx_fail_slave * sizeof(bufs[0]));
>                         }
>                         num_tx_total += num_tx_slave;
>                 }
> @@ -1224,7 +1202,7 @@ bond_ethdev_tx_burst_balance(void *queue, struct
> rte_mbuf **bufs,
>         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = {
> 0 };
>         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
>
> -       uint16_t i, j;
> +       uint16_t i;
>
>         if (unlikely(nb_bufs == 0))
>                 return 0;
> @@ -1268,31 +1246,9 @@ bond_ethdev_tx_burst_balance(void *queue, struct
> rte_mbuf **bufs,
>                         slave_tx_fail_count[i] = slave_nb_bufs[i] -
>                                         slave_tx_count;
>                         total_tx_fail_count += slave_tx_fail_count[i];
> -
> -                       /*
> -                        * Shift bufs to beginning of array to allow
> reordering
> -                        * later
> -                        */
> -                       for (j = 0; j < slave_tx_fail_count[i]; j++) {
> -                               slave_bufs[i][j] =
> -                                       slave_bufs[i][(slave_tx_count - 1)
> + j];
> -                       }
> -               }
> -       }
> -
> -       /*
> -        * If there are tx burst failures we move packets to end of bufs to
> -        * preserve expected PMD behaviour of all failed transmitted being
> -        * at the end of the input mbuf array
> -        */
> -       if (unlikely(total_tx_fail_count > 0)) {
> -               int bufs_idx = nb_bufs - total_tx_fail_count - 1;
> -
> -               for (i = 0; i < slave_count; i++) {
> -                       if (slave_tx_fail_count[i] > 0) {
> -                               for (j = 0; j < slave_tx_fail_count[i];
> j++)
> -                                       bufs[bufs_idx++] =
> slave_bufs[i][j];
> -                       }
> +                       memcpy(&bufs[nb_bufs - total_tx_fail_count],
> +                              &slave_bufs[i][slave_tx_count],
> +                              slave_tx_fail_count[i] * sizeof(bufs[0]));
>                 }
>         }
>
> @@ -1322,7 +1278,7 @@ bond_ethdev_tx_burst_8023ad(void *queue, struct
> rte_mbuf **bufs,
>         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = {
> 0 };
>         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
>
> -       uint16_t i, j;
> +       uint16_t i;
>
>         if (unlikely(nb_bufs == 0))
>                 return 0;
> @@ -1384,35 +1340,9 @@ bond_ethdev_tx_burst_8023ad(void *queue, struct
> rte_mbuf **bufs,
>                                                 slave_tx_count;
>                                 total_tx_fail_count +=
> slave_tx_fail_count[i];
>
> -                               /*
> -                                * Shift bufs to beginning of array to
> allow
> -                                * reordering later
> -                                */
> -                               for (j = 0; j < slave_tx_fail_count[i];
> j++)
> -                                       slave_bufs[i][j] =
> -                                               slave_bufs[i]
> -                                                       [(slave_tx_count -
> 1)
> -                                                       + j];
> -                       }
> -               }
> -
> -               /*
> -                * If there are tx burst failures we move packets to end of
> -                * bufs to preserve expected PMD behaviour of all failed
> -                * transmitted being at the end of the input mbuf array
> -                */
> -               if (unlikely(total_tx_fail_count > 0)) {
> -                       int bufs_idx = nb_bufs - total_tx_fail_count - 1;
> -
> -                       for (i = 0; i < slave_count; i++) {
> -                               if (slave_tx_fail_count[i] > 0) {
> -                                       for (j = 0;
> -                                               j < slave_tx_fail_count[i];
> -                                               j++) {
> -                                               bufs[bufs_idx++] =
> -                                                       slave_bufs[i][j];
> -                                       }
> -                               }
> +                               memcpy(&bufs[nb_bufs -
> total_tx_fail_count],
> +                                      &slave_bufs[i][slave_tx_count],
> +                                      slave_tx_fail_count[i] *
> sizeof(bufs[0]));
>                         }
>                 }
>         }
> --
> 2.7.4
>
>
  
Chas Williams Aug. 20, 2018, 12:07 a.m. UTC | #4
On Sun, Aug 19, 2018 at 6:19 PM Jia Yu <jyu@vmware.com> wrote:

> Hi Chas,
>
>
>
> Thanks for reviewing the change.
>
>
>
> Our application crashed after upgrading to DPDK 18.02, when packet rate is
> high and bond is configured. It happened because txq contains invalid mbuf
> addresses after rte_eth_tx_burst call (for example, 0x100000000 repeated 13
> times in one core dump). It seems that the invalid addresses came from buf
> shift code below, so I changed this part of code to earlier version. We
> don’t see crash after the fix. Please let us know if the fix is reasonable.
>

With respect to correctness, the code you are fixing does seem broken.
See inline below.


>
>
> m_table = {0x7fdf23a68ec0, 0x7fdf23a68400, 0x7fdf23a66e80, 0x7fdf23a65900,
>     0x7fdf23960e00, 0x100000000 <repeats 13 times>, 0x7fdf23978640,
> 0x7fdf23977b80, 0x7fdf239745c0, 0x7fdf23a4d600, 0x7fdf23a4cb40,
>     0x7fdf23a75b00, 0x7fdf23a74580, 0x7fdf23972580, 0x7fdf239a76c0,
> 0x7fdf239a5680, 0x7fdf23a7a640, 0x7fdf23a79b80, 0x7fdf23a77b40,
>     0x7fdf2395b800}
>
>
>
> /* Send packet burst on each slave device */
>     for (i = 0; i < slave_count; i++) {
>         if (slave_nb_bufs[i] == 0)
>             continue;
>
>
>         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
>                 bd_tx_q->queue_id, slave_bufs[i],
>                 slave_nb_bufs[i]);
>

A typical failure here would be to transmit no packets.  slave_tx_count = 0.



>
>
>
>         total_tx_count += slave_tx_count;
>
>
>         /* If tx burst fails move packets to end of bufs */
>         if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
>             slave_tx_fail_count[i] = slave_nb_bufs[i] -
>                     slave_tx_count;
>             total_tx_fail_count += slave_tx_fail_count[i];
>
>
>             /*
>              * Shift bufs to beginning of array to allow reordering
>              * later
>              */
>             for (j = 0; j < slave_tx_fail_count[i]; j++) {
>                 slave_bufs[i][j] =
>                     slave_bufs[i][(slave_tx_count - 1) + j]; <==== what if
> slave_tx_count == 0, j == 0
>

As you correctly point out, slave_tx_count - 1 would be -1.  Bad news.
So, yes, I agree with your fix.  In your fix, you might notice that
slave_tx_fail_count doesn't need to be an array.  It's local to
if (unlikely(slave_tx_count < slave_nb_bufs[i])) now.


>
>             }
>         }
>     }
>
>
>
> Thanks,
>
> Jia
>
>
>
> *From: *Chas Williams <3chas3@gmail.com>
> *Date: *Saturday, August 18, 2018 at 4:50 PM
> *To: *Jia Yu <jyu@vmware.com>
> *Cc: *"dev@dpdk.org" <dev@dpdk.org>, Declan Doherty <
> declan.doherty@intel.com>, Chas Williams <chas3@att.com>
> *Subject: *Re: [dpdk-dev] [PATCH] net/bonding: fix buf corruption in
> merging un-transmitted packets
>
>
>
>
>
>
>
> On Fri, Aug 17, 2018 at 9:46 PM Jia Yu <jyu@vmware.com> wrote:
>
> When bond slave devices cannot transmit all packets in bufs array,
> tx_burst callback shall merge the un-transmitted packets back to
> bufs array. Recent merge logic introduced a bug which causes
> invalid mbuf addresses being written to bufs array.
>
>
>
> Can you expand on this a bit?  What was the commit?
>
>
>
>
>
> When caller frees the un-transmitted packets, due to invalid addresses,
> application will crash.
>
> The fix is avoid shifting mbufs, and directly write un-transmitted
> packets back to bufs array.
>
> Signed-off-by: Jia Yu <jyu@vmware.com>
> ---
>  drivers/net/bonding/rte_eth_bond_pmd.c | 98
> +++++-----------------------------
>  1 file changed, 14 insertions(+), 84 deletions(-)
>
> diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c
> b/drivers/net/bonding/rte_eth_bond_pmd.c
> index 58f7377..cce973a 100644
> --- a/drivers/net/bonding/rte_eth_bond_pmd.c
> +++ b/drivers/net/bonding/rte_eth_bond_pmd.c
> @@ -303,7 +303,7 @@ bond_ethdev_tx_burst_8023ad_fast_queue(void *queue,
> struct rte_mbuf **bufs,
>         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = {
> 0 };
>         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
>
> -       uint16_t i, j;
> +       uint16_t i;
>
>         if (unlikely(nb_bufs == 0))
>                 return 0;
> @@ -361,31 +361,9 @@ bond_ethdev_tx_burst_8023ad_fast_queue(void *queue,
> struct rte_mbuf **bufs,
>                         slave_tx_fail_count[i] = slave_nb_bufs[i] -
>                                         slave_tx_count;
>                         total_tx_fail_count += slave_tx_fail_count[i];
> -
> -                       /*
> -                        * Shift bufs to beginning of array to allow
> reordering
> -                        * later
> -                        */
> -                       for (j = 0; j < slave_tx_fail_count[i]; j++) {
> -                               slave_bufs[i][j] =
> -                                       slave_bufs[i][(slave_tx_count - 1)
> + j];
> -                       }
> -               }
> -       }
> -
> -       /*
> -        * If there are tx burst failures we move packets to end of bufs to
> -        * preserve expected PMD behaviour of all failed transmitted being
> -        * at the end of the input mbuf array
> -        */
> -       if (unlikely(total_tx_fail_count > 0)) {
> -               int bufs_idx = nb_bufs - total_tx_fail_count - 1;
> -
> -               for (i = 0; i < slave_count; i++) {
> -                       if (slave_tx_fail_count[i] > 0) {
> -                               for (j = 0; j < slave_tx_fail_count[i];
> j++)
> -                                       bufs[bufs_idx++] =
> slave_bufs[i][j];
> -                       }
> +                       memcpy(&bufs[nb_bufs - total_tx_fail_count],
> +                              &slave_bufs[i][slave_tx_count],
> +                              slave_tx_fail_count[i] * sizeof(bufs[0]));
>                 }
>         }
>
> @@ -715,8 +693,8 @@ bond_ethdev_tx_burst_round_robin(void *queue, struct
> rte_mbuf **bufs,
>                                 tx_fail_total += tx_fail_slave;
>
>                                 memcpy(&bufs[nb_pkts - tx_fail_total],
> -
>  &slave_bufs[i][num_tx_slave],
> -                                               tx_fail_slave *
> sizeof(bufs[0]));
> +                                      &slave_bufs[i][num_tx_slave],
> +                                      tx_fail_slave * sizeof(bufs[0]));
>                         }
>                         num_tx_total += num_tx_slave;
>                 }
> @@ -1224,7 +1202,7 @@ bond_ethdev_tx_burst_balance(void *queue, struct
> rte_mbuf **bufs,
>         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = {
> 0 };
>         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
>
> -       uint16_t i, j;
> +       uint16_t i;
>
>         if (unlikely(nb_bufs == 0))
>                 return 0;
> @@ -1268,31 +1246,9 @@ bond_ethdev_tx_burst_balance(void *queue, struct
> rte_mbuf **bufs,
>                         slave_tx_fail_count[i] = slave_nb_bufs[i] -
>                                         slave_tx_count;
>                         total_tx_fail_count += slave_tx_fail_count[i];
> -
> -                       /*
> -                        * Shift bufs to beginning of array to allow
> reordering
> -                        * later
> -                        */
> -                       for (j = 0; j < slave_tx_fail_count[i]; j++) {
> -                               slave_bufs[i][j] =
> -                                       slave_bufs[i][(slave_tx_count - 1)
> + j];
> -                       }
> -               }
> -       }
> -
> -       /*
> -        * If there are tx burst failures we move packets to end of bufs to
> -        * preserve expected PMD behaviour of all failed transmitted being
> -        * at the end of the input mbuf array
> -        */
> -       if (unlikely(total_tx_fail_count > 0)) {
> -               int bufs_idx = nb_bufs - total_tx_fail_count - 1;
> -
> -               for (i = 0; i < slave_count; i++) {
> -                       if (slave_tx_fail_count[i] > 0) {
> -                               for (j = 0; j < slave_tx_fail_count[i];
> j++)
> -                                       bufs[bufs_idx++] =
> slave_bufs[i][j];
> -                       }
> +                       memcpy(&bufs[nb_bufs - total_tx_fail_count],
> +                              &slave_bufs[i][slave_tx_count],
> +                              slave_tx_fail_count[i] * sizeof(bufs[0]));
>                 }
>         }
>
> @@ -1322,7 +1278,7 @@ bond_ethdev_tx_burst_8023ad(void *queue, struct
> rte_mbuf **bufs,
>         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = {
> 0 };
>         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
>
> -       uint16_t i, j;
> +       uint16_t i;
>
>         if (unlikely(nb_bufs == 0))
>                 return 0;
> @@ -1384,35 +1340,9 @@ bond_ethdev_tx_burst_8023ad(void *queue, struct
> rte_mbuf **bufs,
>                                                 slave_tx_count;
>                                 total_tx_fail_count +=
> slave_tx_fail_count[i];
>
> -                               /*
> -                                * Shift bufs to beginning of array to
> allow
> -                                * reordering later
> -                                */
> -                               for (j = 0; j < slave_tx_fail_count[i];
> j++)
> -                                       slave_bufs[i][j] =
> -                                               slave_bufs[i]
> -                                                       [(slave_tx_count -
> 1)
> -                                                       + j];
> -                       }
> -               }
> -
> -               /*
> -                * If there are tx burst failures we move packets to end of
> -                * bufs to preserve expected PMD behaviour of all failed
> -                * transmitted being at the end of the input mbuf array
> -                */
> -               if (unlikely(total_tx_fail_count > 0)) {
> -                       int bufs_idx = nb_bufs - total_tx_fail_count - 1;
> -
> -                       for (i = 0; i < slave_count; i++) {
> -                               if (slave_tx_fail_count[i] > 0) {
> -                                       for (j = 0;
> -                                               j < slave_tx_fail_count[i];
> -                                               j++) {
> -                                               bufs[bufs_idx++] =
> -                                                       slave_bufs[i][j];
> -                                       }
> -                               }
> +                               memcpy(&bufs[nb_bufs -
> total_tx_fail_count],
> +                                      &slave_bufs[i][slave_tx_count],
> +                                      slave_tx_fail_count[i] *
> sizeof(bufs[0]));
>                         }
>                 }
>         }
> --
> 2.7.4
>
>
  
Jia Yu Aug. 20, 2018, 7:02 a.m. UTC | #5
Hi Chas,

Could you help to review the v2 of the patch? I have addressed the “Fixes” requirement in commit message, and modified slave_tx_fail_count’s definition.

Thanks,
Jia

From: Chas Williams <3chas3@gmail.com>
Date: Sunday, August 19, 2018 at 5:07 PM
To: Jia Yu <jyu@vmware.com>
Cc: "dev@dpdk.org" <dev@dpdk.org>, Declan Doherty <declan.doherty@intel.com>, Chas Williams <chas3@att.com>
Subject: Re: [dpdk-dev] [PATCH] net/bonding: fix buf corruption in merging un-transmitted packets



On Sun, Aug 19, 2018 at 6:19 PM Jia Yu <jyu@vmware.com<mailto:jyu@vmware.com>> wrote:
Hi Chas,

Thanks for reviewing the change.

Our application crashed after upgrading to DPDK 18.02, when packet rate is high and bond is configured. It happened because txq contains invalid mbuf addresses after rte_eth_tx_burst call (for example, 0x100000000 repeated 13 times in one core dump). It seems that the invalid addresses came from buf shift code below, so I changed this part of code to earlier version. We don’t see crash after the fix. Please let us know if the fix is reasonable.

With respect to correctness, the code you are fixing does seem broken.
See inline below.


m_table = {0x7fdf23a68ec0, 0x7fdf23a68400, 0x7fdf23a66e80, 0x7fdf23a65900,
    0x7fdf23960e00, 0x100000000 <repeats 13 times>, 0x7fdf23978640, 0x7fdf23977b80, 0x7fdf239745c0, 0x7fdf23a4d600, 0x7fdf23a4cb40,
    0x7fdf23a75b00, 0x7fdf23a74580, 0x7fdf23972580, 0x7fdf239a76c0, 0x7fdf239a5680, 0x7fdf23a7a640, 0x7fdf23a79b80, 0x7fdf23a77b40,
    0x7fdf2395b800}

/* Send packet burst on each slave device */
    for (i = 0; i < slave_count; i++) {
        if (slave_nb_bufs[i] == 0)
            continue;

        slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
                bd_tx_q->queue_id, slave_bufs[i],
                slave_nb_bufs[i]);

A typical failure here would be to transmit no packets.  slave_tx_count = 0.




        total_tx_count += slave_tx_count;

        /* If tx burst fails move packets to end of bufs */
        if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
            slave_tx_fail_count[i] = slave_nb_bufs[i] -
                    slave_tx_count;
            total_tx_fail_count += slave_tx_fail_count[i];

            /*
             * Shift bufs to beginning of array to allow reordering
             * later
             */
            for (j = 0; j < slave_tx_fail_count[i]; j++) {
                slave_bufs[i][j] =
                    slave_bufs[i][(slave_tx_count - 1) + j]; <==== what if slave_tx_count == 0, j == 0

As you correctly point out, slave_tx_count - 1 would be -1.  Bad news.
So, yes, I agree with your fix.  In your fix, you might notice that
slave_tx_fail_count doesn't need to be an array.  It's local to
if (unlikely(slave_tx_count < slave_nb_bufs[i])) now.


            }
        }
    }

Thanks,
Jia

From: Chas Williams <3chas3@gmail.com<mailto:3chas3@gmail.com>>
Date: Saturday, August 18, 2018 at 4:50 PM
To: Jia Yu <jyu@vmware.com<mailto:jyu@vmware.com>>
Cc: "dev@dpdk.org<mailto:dev@dpdk.org>" <dev@dpdk.org<mailto:dev@dpdk.org>>, Declan Doherty <declan.doherty@intel.com<mailto:declan.doherty@intel.com>>, Chas Williams <chas3@att.com<mailto:chas3@att.com>>
Subject: Re: [dpdk-dev] [PATCH] net/bonding: fix buf corruption in merging un-transmitted packets



On Fri, Aug 17, 2018 at 9:46 PM Jia Yu <jyu@vmware.com<mailto:jyu@vmware.com>> wrote:
When bond slave devices cannot transmit all packets in bufs array,
tx_burst callback shall merge the un-transmitted packets back to
bufs array. Recent merge logic introduced a bug which causes
invalid mbuf addresses being written to bufs array.

Can you expand on this a bit?  What was the commit?


When caller frees the un-transmitted packets, due to invalid addresses,
application will crash.

The fix is avoid shifting mbufs, and directly write un-transmitted
packets back to bufs array.

Signed-off-by: Jia Yu <jyu@vmware.com<mailto:jyu@vmware.com>>
---
 drivers/net/bonding/rte_eth_bond_pmd.c | 98 +++++-----------------------------
 1 file changed, 14 insertions(+), 84 deletions(-)

diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c
index 58f7377..cce973a 100644
--- a/drivers/net/bonding/rte_eth_bond_pmd.c
+++ b/drivers/net/bonding/rte_eth_bond_pmd.c
@@ -303,7 +303,7 @@ bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
        uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
        uint16_t total_tx_count = 0, total_tx_fail_count = 0;

-       uint16_t i, j;
+       uint16_t i;

        if (unlikely(nb_bufs == 0))
                return 0;
@@ -361,31 +361,9 @@ bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
                        slave_tx_fail_count[i] = slave_nb_bufs[i] -
                                        slave_tx_count;
                        total_tx_fail_count += slave_tx_fail_count[i];
-
-                       /*
-                        * Shift bufs to beginning of array to allow reordering
-                        * later
-                        */
-                       for (j = 0; j < slave_tx_fail_count[i]; j++) {
-                               slave_bufs[i][j] =
-                                       slave_bufs[i][(slave_tx_count - 1) + j];
-                       }
-               }
-       }
-
-       /*
-        * If there are tx burst failures we move packets to end of bufs to
-        * preserve expected PMD behaviour of all failed transmitted being
-        * at the end of the input mbuf array
-        */
-       if (unlikely(total_tx_fail_count > 0)) {
-               int bufs_idx = nb_bufs - total_tx_fail_count - 1;
-
-               for (i = 0; i < slave_count; i++) {
-                       if (slave_tx_fail_count[i] > 0) {
-                               for (j = 0; j < slave_tx_fail_count[i]; j++)
-                                       bufs[bufs_idx++] = slave_bufs[i][j];
-                       }
+                       memcpy(&bufs[nb_bufs - total_tx_fail_count],
+                              &slave_bufs[i][slave_tx_count],
+                              slave_tx_fail_count[i] * sizeof(bufs[0]));
                }
        }

@@ -715,8 +693,8 @@ bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
                                tx_fail_total += tx_fail_slave;

                                memcpy(&bufs[nb_pkts - tx_fail_total],
-                                               &slave_bufs[i][num_tx_slave],
-                                               tx_fail_slave * sizeof(bufs[0]));
+                                      &slave_bufs[i][num_tx_slave],
+                                      tx_fail_slave * sizeof(bufs[0]));
                        }
                        num_tx_total += num_tx_slave;
                }
@@ -1224,7 +1202,7 @@ bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
        uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
        uint16_t total_tx_count = 0, total_tx_fail_count = 0;

-       uint16_t i, j;
+       uint16_t i;

        if (unlikely(nb_bufs == 0))
                return 0;
@@ -1268,31 +1246,9 @@ bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
                        slave_tx_fail_count[i] = slave_nb_bufs[i] -
                                        slave_tx_count;
                        total_tx_fail_count += slave_tx_fail_count[i];
-
-                       /*
-                        * Shift bufs to beginning of array to allow reordering
-                        * later
-                        */
-                       for (j = 0; j < slave_tx_fail_count[i]; j++) {
-                               slave_bufs[i][j] =
-                                       slave_bufs[i][(slave_tx_count - 1) + j];
-                       }
-               }
-       }
-
-       /*
-        * If there are tx burst failures we move packets to end of bufs to
-        * preserve expected PMD behaviour of all failed transmitted being
-        * at the end of the input mbuf array
-        */
-       if (unlikely(total_tx_fail_count > 0)) {
-               int bufs_idx = nb_bufs - total_tx_fail_count - 1;
-
-               for (i = 0; i < slave_count; i++) {
-                       if (slave_tx_fail_count[i] > 0) {
-                               for (j = 0; j < slave_tx_fail_count[i]; j++)
-                                       bufs[bufs_idx++] = slave_bufs[i][j];
-                       }
+                       memcpy(&bufs[nb_bufs - total_tx_fail_count],
+                              &slave_bufs[i][slave_tx_count],
+                              slave_tx_fail_count[i] * sizeof(bufs[0]));
                }
        }

@@ -1322,7 +1278,7 @@ bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
        uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
        uint16_t total_tx_count = 0, total_tx_fail_count = 0;

-       uint16_t i, j;
+       uint16_t i;

        if (unlikely(nb_bufs == 0))
                return 0;
@@ -1384,35 +1340,9 @@ bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
                                                slave_tx_count;
                                total_tx_fail_count += slave_tx_fail_count[i];

-                               /*
-                                * Shift bufs to beginning of array to allow
-                                * reordering later
-                                */
-                               for (j = 0; j < slave_tx_fail_count[i]; j++)
-                                       slave_bufs[i][j] =
-                                               slave_bufs[i]
-                                                       [(slave_tx_count - 1)
-                                                       + j];
-                       }
-               }
-
-               /*
-                * If there are tx burst failures we move packets to end of
-                * bufs to preserve expected PMD behaviour of all failed
-                * transmitted being at the end of the input mbuf array
-                */
-               if (unlikely(total_tx_fail_count > 0)) {
-                       int bufs_idx = nb_bufs - total_tx_fail_count - 1;
-
-                       for (i = 0; i < slave_count; i++) {
-                               if (slave_tx_fail_count[i] > 0) {
-                                       for (j = 0;
-                                               j < slave_tx_fail_count[i];
-                                               j++) {
-                                               bufs[bufs_idx++] =
-                                                       slave_bufs[i][j];
-                                       }
-                               }
+                               memcpy(&bufs[nb_bufs - total_tx_fail_count],
+                                      &slave_bufs[i][slave_tx_count],
+                                      slave_tx_fail_count[i] * sizeof(bufs[0]));
                        }
                }
        }
--
2.7.4
  
Chas Williams Aug. 20, 2018, 2:02 p.m. UTC | #6
On Mon, Aug 20, 2018 at 3:02 AM Jia Yu <jyu@vmware.com> wrote:

> Hi Chas,
>
>
>
> Could you help to review the v2 of the patch? I have addressed the “Fixes”
> requirement in commit message, and modified slave_tx_fail_count’s
> definition.
>

Looks fine to me.  Thanks for the patch!


>
>
> Thanks,
>
> Jia
>
>
>
> *From: *Chas Williams <3chas3@gmail.com>
> *Date: *Sunday, August 19, 2018 at 5:07 PM
> *To: *Jia Yu <jyu@vmware.com>
> *Cc: *"dev@dpdk.org" <dev@dpdk.org>, Declan Doherty <
> declan.doherty@intel.com>, Chas Williams <chas3@att.com>
> *Subject: *Re: [dpdk-dev] [PATCH] net/bonding: fix buf corruption in
> merging un-transmitted packets
>
>
>
>
>
>
>
> On Sun, Aug 19, 2018 at 6:19 PM Jia Yu <jyu@vmware.com> wrote:
>
> Hi Chas,
>
>
>
> Thanks for reviewing the change.
>
>
>
> Our application crashed after upgrading to DPDK 18.02, when packet rate is
> high and bond is configured. It happened because txq contains invalid mbuf
> addresses after rte_eth_tx_burst call (for example, 0x100000000 repeated 13
> times in one core dump). It seems that the invalid addresses came from buf
> shift code below, so I changed this part of code to earlier version. We
> don’t see crash after the fix. Please let us know if the fix is reasonable.
>
>
>
> With respect to correctness, the code you are fixing does seem broken.
>
> See inline below.
>
>
>
>
>
> m_table = {0x7fdf23a68ec0, 0x7fdf23a68400, 0x7fdf23a66e80, 0x7fdf23a65900,
>     0x7fdf23960e00, 0x100000000 <repeats 13 times>, 0x7fdf23978640,
> 0x7fdf23977b80, 0x7fdf239745c0, 0x7fdf23a4d600, 0x7fdf23a4cb40,
>     0x7fdf23a75b00, 0x7fdf23a74580, 0x7fdf23972580, 0x7fdf239a76c0,
> 0x7fdf239a5680, 0x7fdf23a7a640, 0x7fdf23a79b80, 0x7fdf23a77b40,
>     0x7fdf2395b800}
>
>
>
> /* Send packet burst on each slave device */
>     for (i = 0; i < slave_count; i++) {
>         if (slave_nb_bufs[i] == 0)
>             continue;
>
>
>         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
>                 bd_tx_q->queue_id, slave_bufs[i],
>                 slave_nb_bufs[i]);
>
>
>
> A typical failure here would be to transmit no packets.  slave_tx_count =
> 0.
>
>
>
>
>
>
>
>
>         total_tx_count += slave_tx_count;
>
>
>         /* If tx burst fails move packets to end of bufs */
>         if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
>             slave_tx_fail_count[i] = slave_nb_bufs[i] -
>                     slave_tx_count;
>             total_tx_fail_count += slave_tx_fail_count[i];
>
>
>             /*
>              * Shift bufs to beginning of array to allow reordering
>              * later
>              */
>             for (j = 0; j < slave_tx_fail_count[i]; j++) {
>                 slave_bufs[i][j] =
>                     slave_bufs[i][(slave_tx_count - 1) + j]; <==== what if
> slave_tx_count == 0, j == 0
>
>
>
> As you correctly point out, slave_tx_count - 1 would be -1.  Bad news.
>
> So, yes, I agree with your fix.  In your fix, you might notice that
>
> slave_tx_fail_count doesn't need to be an array.  It's local to
>
> if (unlikely(slave_tx_count < slave_nb_bufs[i])) now.
>
>
>
>
>             }
>         }
>     }
>
>
>
> Thanks,
>
> Jia
>
>
>
> *From: *Chas Williams <3chas3@gmail.com>
> *Date: *Saturday, August 18, 2018 at 4:50 PM
> *To: *Jia Yu <jyu@vmware.com>
> *Cc: *"dev@dpdk.org" <dev@dpdk.org>, Declan Doherty <
> declan.doherty@intel.com>, Chas Williams <chas3@att.com>
> *Subject: *Re: [dpdk-dev] [PATCH] net/bonding: fix buf corruption in
> merging un-transmitted packets
>
>
>
>
>
>
>
> On Fri, Aug 17, 2018 at 9:46 PM Jia Yu <jyu@vmware.com> wrote:
>
> When bond slave devices cannot transmit all packets in bufs array,
> tx_burst callback shall merge the un-transmitted packets back to
> bufs array. Recent merge logic introduced a bug which causes
> invalid mbuf addresses being written to bufs array.
>
>
>
> Can you expand on this a bit?  What was the commit?
>
>
>
>
>
> When caller frees the un-transmitted packets, due to invalid addresses,
> application will crash.
>
> The fix is avoid shifting mbufs, and directly write un-transmitted
> packets back to bufs array.
>
> Signed-off-by: Jia Yu <jyu@vmware.com>
> ---
>  drivers/net/bonding/rte_eth_bond_pmd.c | 98
> +++++-----------------------------
>  1 file changed, 14 insertions(+), 84 deletions(-)
>
> diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c
> b/drivers/net/bonding/rte_eth_bond_pmd.c
> index 58f7377..cce973a 100644
> --- a/drivers/net/bonding/rte_eth_bond_pmd.c
> +++ b/drivers/net/bonding/rte_eth_bond_pmd.c
> @@ -303,7 +303,7 @@ bond_ethdev_tx_burst_8023ad_fast_queue(void *queue,
> struct rte_mbuf **bufs,
>         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = {
> 0 };
>         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
>
> -       uint16_t i, j;
> +       uint16_t i;
>
>         if (unlikely(nb_bufs == 0))
>                 return 0;
> @@ -361,31 +361,9 @@ bond_ethdev_tx_burst_8023ad_fast_queue(void *queue,
> struct rte_mbuf **bufs,
>                         slave_tx_fail_count[i] = slave_nb_bufs[i] -
>                                         slave_tx_count;
>                         total_tx_fail_count += slave_tx_fail_count[i];
> -
> -                       /*
> -                        * Shift bufs to beginning of array to allow
> reordering
> -                        * later
> -                        */
> -                       for (j = 0; j < slave_tx_fail_count[i]; j++) {
> -                               slave_bufs[i][j] =
> -                                       slave_bufs[i][(slave_tx_count - 1)
> + j];
> -                       }
> -               }
> -       }
> -
> -       /*
> -        * If there are tx burst failures we move packets to end of bufs to
> -        * preserve expected PMD behaviour of all failed transmitted being
> -        * at the end of the input mbuf array
> -        */
> -       if (unlikely(total_tx_fail_count > 0)) {
> -               int bufs_idx = nb_bufs - total_tx_fail_count - 1;
> -
> -               for (i = 0; i < slave_count; i++) {
> -                       if (slave_tx_fail_count[i] > 0) {
> -                               for (j = 0; j < slave_tx_fail_count[i];
> j++)
> -                                       bufs[bufs_idx++] =
> slave_bufs[i][j];
> -                       }
> +                       memcpy(&bufs[nb_bufs - total_tx_fail_count],
> +                              &slave_bufs[i][slave_tx_count],
> +                              slave_tx_fail_count[i] * sizeof(bufs[0]));
>                 }
>         }
>
> @@ -715,8 +693,8 @@ bond_ethdev_tx_burst_round_robin(void *queue, struct
> rte_mbuf **bufs,
>                                 tx_fail_total += tx_fail_slave;
>
>                                 memcpy(&bufs[nb_pkts - tx_fail_total],
> -
>  &slave_bufs[i][num_tx_slave],
> -                                               tx_fail_slave *
> sizeof(bufs[0]));
> +                                      &slave_bufs[i][num_tx_slave],
> +                                      tx_fail_slave * sizeof(bufs[0]));
>                         }
>                         num_tx_total += num_tx_slave;
>                 }
> @@ -1224,7 +1202,7 @@ bond_ethdev_tx_burst_balance(void *queue, struct
> rte_mbuf **bufs,
>         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = {
> 0 };
>         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
>
> -       uint16_t i, j;
> +       uint16_t i;
>
>         if (unlikely(nb_bufs == 0))
>                 return 0;
> @@ -1268,31 +1246,9 @@ bond_ethdev_tx_burst_balance(void *queue, struct
> rte_mbuf **bufs,
>                         slave_tx_fail_count[i] = slave_nb_bufs[i] -
>                                         slave_tx_count;
>                         total_tx_fail_count += slave_tx_fail_count[i];
> -
> -                       /*
> -                        * Shift bufs to beginning of array to allow
> reordering
> -                        * later
> -                        */
> -                       for (j = 0; j < slave_tx_fail_count[i]; j++) {
> -                               slave_bufs[i][j] =
> -                                       slave_bufs[i][(slave_tx_count - 1)
> + j];
> -                       }
> -               }
> -       }
> -
> -       /*
> -        * If there are tx burst failures we move packets to end of bufs to
> -        * preserve expected PMD behaviour of all failed transmitted being
> -        * at the end of the input mbuf array
> -        */
> -       if (unlikely(total_tx_fail_count > 0)) {
> -               int bufs_idx = nb_bufs - total_tx_fail_count - 1;
> -
> -               for (i = 0; i < slave_count; i++) {
> -                       if (slave_tx_fail_count[i] > 0) {
> -                               for (j = 0; j < slave_tx_fail_count[i];
> j++)
> -                                       bufs[bufs_idx++] =
> slave_bufs[i][j];
> -                       }
> +                       memcpy(&bufs[nb_bufs - total_tx_fail_count],
> +                              &slave_bufs[i][slave_tx_count],
> +                              slave_tx_fail_count[i] * sizeof(bufs[0]));
>                 }
>         }
>
> @@ -1322,7 +1278,7 @@ bond_ethdev_tx_burst_8023ad(void *queue, struct
> rte_mbuf **bufs,
>         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = {
> 0 };
>         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
>
> -       uint16_t i, j;
> +       uint16_t i;
>
>         if (unlikely(nb_bufs == 0))
>                 return 0;
> @@ -1384,35 +1340,9 @@ bond_ethdev_tx_burst_8023ad(void *queue, struct
> rte_mbuf **bufs,
>                                                 slave_tx_count;
>                                 total_tx_fail_count +=
> slave_tx_fail_count[i];
>
> -                               /*
> -                                * Shift bufs to beginning of array to
> allow
> -                                * reordering later
> -                                */
> -                               for (j = 0; j < slave_tx_fail_count[i];
> j++)
> -                                       slave_bufs[i][j] =
> -                                               slave_bufs[i]
> -                                                       [(slave_tx_count -
> 1)
> -                                                       + j];
> -                       }
> -               }
> -
> -               /*
> -                * If there are tx burst failures we move packets to end of
> -                * bufs to preserve expected PMD behaviour of all failed
> -                * transmitted being at the end of the input mbuf array
> -                */
> -               if (unlikely(total_tx_fail_count > 0)) {
> -                       int bufs_idx = nb_bufs - total_tx_fail_count - 1;
> -
> -                       for (i = 0; i < slave_count; i++) {
> -                               if (slave_tx_fail_count[i] > 0) {
> -                                       for (j = 0;
> -                                               j < slave_tx_fail_count[i];
> -                                               j++) {
> -                                               bufs[bufs_idx++] =
> -                                                       slave_bufs[i][j];
> -                                       }
> -                               }
> +                               memcpy(&bufs[nb_bufs -
> total_tx_fail_count],
> +                                      &slave_bufs[i][slave_tx_count],
> +                                      slave_tx_fail_count[i] *
> sizeof(bufs[0]));
>                         }
>                 }
>         }
> --
> 2.7.4
>
>
  

Patch

diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c
index 58f7377..cce973a 100644
--- a/drivers/net/bonding/rte_eth_bond_pmd.c
+++ b/drivers/net/bonding/rte_eth_bond_pmd.c
@@ -303,7 +303,7 @@  bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
 	uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
 	uint16_t total_tx_count = 0, total_tx_fail_count = 0;
 
-	uint16_t i, j;
+	uint16_t i;
 
 	if (unlikely(nb_bufs == 0))
 		return 0;
@@ -361,31 +361,9 @@  bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
 			slave_tx_fail_count[i] = slave_nb_bufs[i] -
 					slave_tx_count;
 			total_tx_fail_count += slave_tx_fail_count[i];
-
-			/*
-			 * Shift bufs to beginning of array to allow reordering
-			 * later
-			 */
-			for (j = 0; j < slave_tx_fail_count[i]; j++) {
-				slave_bufs[i][j] =
-					slave_bufs[i][(slave_tx_count - 1) + j];
-			}
-		}
-	}
-
-	/*
-	 * If there are tx burst failures we move packets to end of bufs to
-	 * preserve expected PMD behaviour of all failed transmitted being
-	 * at the end of the input mbuf array
-	 */
-	if (unlikely(total_tx_fail_count > 0)) {
-		int bufs_idx = nb_bufs - total_tx_fail_count - 1;
-
-		for (i = 0; i < slave_count; i++) {
-			if (slave_tx_fail_count[i] > 0) {
-				for (j = 0; j < slave_tx_fail_count[i]; j++)
-					bufs[bufs_idx++] = slave_bufs[i][j];
-			}
+			memcpy(&bufs[nb_bufs - total_tx_fail_count],
+			       &slave_bufs[i][slave_tx_count],
+			       slave_tx_fail_count[i] * sizeof(bufs[0]));
 		}
 	}
 
@@ -715,8 +693,8 @@  bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
 				tx_fail_total += tx_fail_slave;
 
 				memcpy(&bufs[nb_pkts - tx_fail_total],
-						&slave_bufs[i][num_tx_slave],
-						tx_fail_slave * sizeof(bufs[0]));
+				       &slave_bufs[i][num_tx_slave],
+				       tx_fail_slave * sizeof(bufs[0]));
 			}
 			num_tx_total += num_tx_slave;
 		}
@@ -1224,7 +1202,7 @@  bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
 	uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
 	uint16_t total_tx_count = 0, total_tx_fail_count = 0;
 
-	uint16_t i, j;
+	uint16_t i;
 
 	if (unlikely(nb_bufs == 0))
 		return 0;
@@ -1268,31 +1246,9 @@  bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
 			slave_tx_fail_count[i] = slave_nb_bufs[i] -
 					slave_tx_count;
 			total_tx_fail_count += slave_tx_fail_count[i];
-
-			/*
-			 * Shift bufs to beginning of array to allow reordering
-			 * later
-			 */
-			for (j = 0; j < slave_tx_fail_count[i]; j++) {
-				slave_bufs[i][j] =
-					slave_bufs[i][(slave_tx_count - 1) + j];
-			}
-		}
-	}
-
-	/*
-	 * If there are tx burst failures we move packets to end of bufs to
-	 * preserve expected PMD behaviour of all failed transmitted being
-	 * at the end of the input mbuf array
-	 */
-	if (unlikely(total_tx_fail_count > 0)) {
-		int bufs_idx = nb_bufs - total_tx_fail_count - 1;
-
-		for (i = 0; i < slave_count; i++) {
-			if (slave_tx_fail_count[i] > 0) {
-				for (j = 0; j < slave_tx_fail_count[i]; j++)
-					bufs[bufs_idx++] = slave_bufs[i][j];
-			}
+			memcpy(&bufs[nb_bufs - total_tx_fail_count],
+			       &slave_bufs[i][slave_tx_count],
+			       slave_tx_fail_count[i] * sizeof(bufs[0]));
 		}
 	}
 
@@ -1322,7 +1278,7 @@  bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
 	uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
 	uint16_t total_tx_count = 0, total_tx_fail_count = 0;
 
-	uint16_t i, j;
+	uint16_t i;
 
 	if (unlikely(nb_bufs == 0))
 		return 0;
@@ -1384,35 +1340,9 @@  bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
 						slave_tx_count;
 				total_tx_fail_count += slave_tx_fail_count[i];
 
-				/*
-				 * Shift bufs to beginning of array to allow
-				 * reordering later
-				 */
-				for (j = 0; j < slave_tx_fail_count[i]; j++)
-					slave_bufs[i][j] =
-						slave_bufs[i]
-							[(slave_tx_count - 1)
-							+ j];
-			}
-		}
-
-		/*
-		 * If there are tx burst failures we move packets to end of
-		 * bufs to preserve expected PMD behaviour of all failed
-		 * transmitted being at the end of the input mbuf array
-		 */
-		if (unlikely(total_tx_fail_count > 0)) {
-			int bufs_idx = nb_bufs - total_tx_fail_count - 1;
-
-			for (i = 0; i < slave_count; i++) {
-				if (slave_tx_fail_count[i] > 0) {
-					for (j = 0;
-						j < slave_tx_fail_count[i];
-						j++) {
-						bufs[bufs_idx++] =
-							slave_bufs[i][j];
-					}
-				}
+				memcpy(&bufs[nb_bufs - total_tx_fail_count],
+				       &slave_bufs[i][slave_tx_count],
+				       slave_tx_fail_count[i] * sizeof(bufs[0]));
 			}
 		}
 	}