[v2] net/tap: fix potential buffer overrun
Checks
Commit Message
When secondary to primary process synchronization occours
there is no check for number of fds which could cause buffer overrun.
Bugzilla ID: 252
Fixes: c9aa56edec8e ("net/tap: access primary process queues from secondary")
Cc: rasland@mellanox.com
Cc: stable@dpdk.org
Signed-off-by: Herakliusz Lipiec <herakliusz.lipiec@intel.com>
---
drivers/net/tap/rte_eth_tap.c | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
Comments
On 25-Apr-19 6:17 PM, Herakliusz Lipiec wrote:
> When secondary to primary process synchronization occours
> there is no check for number of fds which could cause buffer overrun.
>
> Bugzilla ID: 252
> Fixes: c9aa56edec8e ("net/tap: access primary process queues from secondary")
> Cc: rasland@mellanox.com
> Cc: stable@dpdk.org
>
> Signed-off-by: Herakliusz Lipiec <herakliusz.lipiec@intel.com>
> ---
> drivers/net/tap/rte_eth_tap.c | 13 +++++++++++--
> 1 file changed, 11 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
> index e9fda8cf6..4a2ef5ce7 100644
> --- a/drivers/net/tap/rte_eth_tap.c
> +++ b/drivers/net/tap/rte_eth_tap.c
> @@ -2111,6 +2111,10 @@ tap_mp_attach_queues(const char *port_name, struct rte_eth_dev *dev)
> TAP_LOG(DEBUG, "Received IPC reply for %s", reply_param->port_name);
>
> /* Attach the queues from received file descriptors */
> + if (reply_param->rxq_count + reply_param->txq_count != reply->num_fds) {
> + TAP_LOG(ERR, "Unexpected number of fds received");
> + return -1;
> + }
> dev->data->nb_rx_queues = reply_param->rxq_count;
> dev->data->nb_tx_queues = reply_param->txq_count;
> fd_iterator = 0;
> @@ -2151,12 +2155,16 @@ tap_mp_sync_queues(const struct rte_mp_msg *request, const void *peer)
> /* Fill file descriptors for all queues */
> reply.num_fds = 0;
> reply_param->rxq_count = 0;
> + if (dev->data->nb_rx_queues + dev->data->nb_tx_queues >
> + RTE_MP_MAX_FD_NUM){
> + TAP_LOG(ERR, "Number of rx/tx queues exceeds max number of fds");
> + return -1;
> + }
> for (queue = 0; queue < dev->data->nb_rx_queues; queue++) {
> reply.fds[reply.num_fds++] = process_private->rxq_fds[queue];
> reply_param->rxq_count++;
> }
> RTE_ASSERT(reply_param->rxq_count == dev->data->nb_rx_queues);
> - RTE_ASSERT(reply_param->txq_count == dev->data->nb_tx_queues);
> RTE_ASSERT(reply.num_fds <= RTE_MP_MAX_FD_NUM);
>
> reply_param->txq_count = 0;
> @@ -2164,7 +2172,8 @@ tap_mp_sync_queues(const struct rte_mp_msg *request, const void *peer)
> reply.fds[reply.num_fds++] = process_private->txq_fds[queue];
> reply_param->txq_count++;
> }
> -
> + RTE_ASSERT(reply_param->txq_count == dev->data->nb_tx_queues);
> + RTE_ASSERT(reply.num_fds <= RTE_MP_MAX_FD_NUM);
> /* Send reply */
> strlcpy(reply.name, request->name, sizeof(reply.name));
> strlcpy(reply_param->port_name, request_param->port_name,
>
Reviewed-by: Anatoly Burakov <anatoly.burakov@intel.com>
On 4/25/2019 6:17 PM, Herakliusz Lipiec wrote:
> When secondary to primary process synchronization occours
> there is no check for number of fds which could cause buffer overrun.
>
> Bugzilla ID: 252
> Fixes: c9aa56edec8e ("net/tap: access primary process queues from secondary")
> Cc: rasland@mellanox.com
> Cc: stable@dpdk.org
>
> Signed-off-by: Herakliusz Lipiec <herakliusz.lipiec@intel.com>
> ---
> drivers/net/tap/rte_eth_tap.c | 13 +++++++++++--
> 1 file changed, 11 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
> index e9fda8cf6..4a2ef5ce7 100644
> --- a/drivers/net/tap/rte_eth_tap.c
> +++ b/drivers/net/tap/rte_eth_tap.c
> @@ -2111,6 +2111,10 @@ tap_mp_attach_queues(const char *port_name, struct rte_eth_dev *dev)
> TAP_LOG(DEBUG, "Received IPC reply for %s", reply_param->port_name);
>
> /* Attach the queues from received file descriptors */
> + if (reply_param->rxq_count + reply_param->txq_count != reply->num_fds) {
> + TAP_LOG(ERR, "Unexpected number of fds received");
> + return -1;
> + }
Is there a way this can happen? If not I suggest remove the check.
> dev->data->nb_rx_queues = reply_param->rxq_count;
> dev->data->nb_tx_queues = reply_param->txq_count;
> fd_iterator = 0;
> @@ -2151,12 +2155,16 @@ tap_mp_sync_queues(const struct rte_mp_msg *request, const void *peer)
> /* Fill file descriptors for all queues */
> reply.num_fds = 0;
> reply_param->rxq_count = 0;
> + if (dev->data->nb_rx_queues + dev->data->nb_tx_queues >
> + RTE_MP_MAX_FD_NUM){
> + TAP_LOG(ERR, "Number of rx/tx queues exceeds max number of fds");
> + return -1;
> + }
+1 for the check.
But what it does when return "-1", not send a message at all? If so would it be
better to send and error message back instead of waiting the receiver to timeout?
> for (queue = 0; queue < dev->data->nb_rx_queues; queue++) {
> reply.fds[reply.num_fds++] = process_private->rxq_fds[queue];
> reply_param->rxq_count++;
> }
> RTE_ASSERT(reply_param->rxq_count == dev->data->nb_rx_queues);
> - RTE_ASSERT(reply_param->txq_count == dev->data->nb_tx_queues);
> RTE_ASSERT(reply.num_fds <= RTE_MP_MAX_FD_NUM);
Since there is dynamic check above for "RTE_MP_MAX_FD_NUM", we can remove this
assert I think.
>
> reply_param->txq_count = 0;
> @@ -2164,7 +2172,8 @@ tap_mp_sync_queues(const struct rte_mp_msg *request, const void *peer)
> reply.fds[reply.num_fds++] = process_private->txq_fds[queue];
> reply_param->txq_count++;
> }
> -
> + RTE_ASSERT(reply_param->txq_count == dev->data->nb_tx_queues);
> + RTE_ASSERT(reply.num_fds <= RTE_MP_MAX_FD_NUM);
Same for this assert, we can remove it.
And as syntax, please keep the empty line before next block.
> /* Send reply */
> strlcpy(reply.name, request->name, sizeof(reply.name));
> strlcpy(reply_param->port_name, request_param->port_name,
>
> On Apr 25, 2019, at 10:17 AM, Lipiec, Herakliusz <herakliusz.lipiec@intel.com> wrote:
>
> When secondary to primary process synchronization occours
> there is no check for number of fds which could cause buffer overrun.
>
> Bugzilla ID: 252
> Fixes: c9aa56edec8e ("net/tap: access primary process queues from secondary")
> Cc: rasland@mellanox.com
> Cc: stable@dpdk.org
>
> Signed-off-by: Herakliusz Lipiec <herakliusz.lipiec@intel.com>
> ---
> drivers/net/tap/rte_eth_tap.c | 13 +++++++++++--
> 1 file changed, 11 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
> index e9fda8cf6..4a2ef5ce7 100644
> --- a/drivers/net/tap/rte_eth_tap.c
> +++ b/drivers/net/tap/rte_eth_tap.c
> @@ -2111,6 +2111,10 @@ tap_mp_attach_queues(const char *port_name, struct rte_eth_dev *dev)
> TAP_LOG(DEBUG, "Received IPC reply for %s", reply_param->port_name);
>
> /* Attach the queues from received file descriptors */
> + if (reply_param->rxq_count + reply_param->txq_count != reply->num_fds) {
> + TAP_LOG(ERR, "Unexpected number of fds received");
> + return -1;
> + }
This check is reasonable, but why is this being done on the receive side and not checked on the send side. There may need to be a check for num_fds being zero or greater than 8 as that is the limit to the number of FDs that can be handle by the IPC.
In a different thread for Bug-258 we need to return an indicator that the receive side detected an error by returning 0 for num_fds and I have patch for that one.
https://bugs.dpdk.org/show_bug.cgi?id=258
I would have expected the sender to make sure they match and then this test is not needed, but a test for num_fds being zero or > 8 is needed if you want to detect the failure here or not if you do not care as long as nb_[r/t]x_queues is zero too.
> dev->data->nb_rx_queues = reply_param->rxq_count;
> dev->data->nb_tx_queues = reply_param->txq_count;
> fd_iterator = 0;
> @@ -2151,12 +2155,16 @@ tap_mp_sync_queues(const struct rte_mp_msg *request, const void *peer)
> /* Fill file descriptors for all queues */
> reply.num_fds = 0;
> reply_param->rxq_count = 0;
> + if (dev->data->nb_rx_queues + dev->data->nb_tx_queues >
> + RTE_MP_MAX_FD_NUM){
> + TAP_LOG(ERR, "Number of rx/tx queues exceeds max number of fds");
> + return -1;
> + }
> for (queue = 0; queue < dev->data->nb_rx_queues; queue++) {
> reply.fds[reply.num_fds++] = process_private->rxq_fds[queue];
> reply_param->rxq_count++;
> }
> RTE_ASSERT(reply_param->rxq_count == dev->data->nb_rx_queues);
> - RTE_ASSERT(reply_param->txq_count == dev->data->nb_tx_queues);
> RTE_ASSERT(reply.num_fds <= RTE_MP_MAX_FD_NUM);
>
> reply_param->txq_count = 0;
> @@ -2164,7 +2172,8 @@ tap_mp_sync_queues(const struct rte_mp_msg *request, const void *peer)
> reply.fds[reply.num_fds++] = process_private->txq_fds[queue];
> reply_param->txq_count++;
> }
> -
> + RTE_ASSERT(reply_param->txq_count == dev->data->nb_tx_queues);
> + RTE_ASSERT(reply.num_fds <= RTE_MP_MAX_FD_NUM);
> /* Send reply */
> strlcpy(reply.name, request->name, sizeof(reply.name));
> strlcpy(reply_param->port_name, request_param->port_name,
> --
> 2.17.2
>
Regards,
Keith
On 29-Apr-19 2:53 PM, Ferruh Yigit wrote:
> On 4/25/2019 6:17 PM, Herakliusz Lipiec wrote:
>> When secondary to primary process synchronization occours
>> there is no check for number of fds which could cause buffer overrun.
>>
>> Bugzilla ID: 252
>> Fixes: c9aa56edec8e ("net/tap: access primary process queues from secondary")
>> Cc: rasland@mellanox.com
>> Cc: stable@dpdk.org
>>
>> Signed-off-by: Herakliusz Lipiec <herakliusz.lipiec@intel.com>
>> ---
>> drivers/net/tap/rte_eth_tap.c | 13 +++++++++++--
>> 1 file changed, 11 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
>> index e9fda8cf6..4a2ef5ce7 100644
>> --- a/drivers/net/tap/rte_eth_tap.c
>> +++ b/drivers/net/tap/rte_eth_tap.c
>> @@ -2111,6 +2111,10 @@ tap_mp_attach_queues(const char *port_name, struct rte_eth_dev *dev)
>> TAP_LOG(DEBUG, "Received IPC reply for %s", reply_param->port_name);
>>
>> /* Attach the queues from received file descriptors */
>> + if (reply_param->rxq_count + reply_param->txq_count != reply->num_fds) {
>> + TAP_LOG(ERR, "Unexpected number of fds received");
>> + return -1;
>> + }
>
> Is there a way this can happen? If not I suggest remove the check.
Normally no, but theoretically this can trigger a buffer overrun if not
checked. After all, something could either fail on the other side, or
someone could send a fake message :) This data is coming from an
external source, so we need to sanity-check it.
>
>> dev->data->nb_rx_queues = reply_param->rxq_count;
>> dev->data->nb_tx_queues = reply_param->txq_count;
>> fd_iterator = 0;
>> @@ -2151,12 +2155,16 @@ tap_mp_sync_queues(const struct rte_mp_msg *request, const void *peer)
>> /* Fill file descriptors for all queues */
>> reply.num_fds = 0;
>> reply_param->rxq_count = 0;
>> + if (dev->data->nb_rx_queues + dev->data->nb_tx_queues >
>> + RTE_MP_MAX_FD_NUM){
>> + TAP_LOG(ERR, "Number of rx/tx queues exceeds max number of fds");
>> + return -1;
>> + }
>
> +1 for the check.
> But what it does when return "-1", not send a message at all? If so would it be
> better to send and error message back instead of waiting the receiver to timeout?
There will be a different patch fixing this specific issue. Probably
this patch would need to be rebased on top of that.
>
>> for (queue = 0; queue < dev->data->nb_rx_queues; queue++) {
>> reply.fds[reply.num_fds++] = process_private->rxq_fds[queue];
>> reply_param->rxq_count++;
>> }
>> RTE_ASSERT(reply_param->rxq_count == dev->data->nb_rx_queues);
>> - RTE_ASSERT(reply_param->txq_count == dev->data->nb_tx_queues);
>> RTE_ASSERT(reply.num_fds <= RTE_MP_MAX_FD_NUM);
>
> Since there is dynamic check above for "RTE_MP_MAX_FD_NUM", we can remove this
> assert I think.
>
>>
>> reply_param->txq_count = 0;
>> @@ -2164,7 +2172,8 @@ tap_mp_sync_queues(const struct rte_mp_msg *request, const void *peer)
>> reply.fds[reply.num_fds++] = process_private->txq_fds[queue];
>> reply_param->txq_count++;
>> }
>> -
>> + RTE_ASSERT(reply_param->txq_count == dev->data->nb_tx_queues);
>> + RTE_ASSERT(reply.num_fds <= RTE_MP_MAX_FD_NUM);
>
> Same for this assert, we can remove it.
> And as syntax, please keep the empty line before next block.
>
>> /* Send reply */
>> strlcpy(reply.name, request->name, sizeof(reply.name));
>> strlcpy(reply_param->port_name, request_param->port_name,
>>
>
>
On 29-Apr-19 2:58 PM, Wiles, Keith wrote:
>
>
>> On Apr 25, 2019, at 10:17 AM, Lipiec, Herakliusz <herakliusz.lipiec@intel.com> wrote:
>>
>> When secondary to primary process synchronization occours
>> there is no check for number of fds which could cause buffer overrun.
>>
>> Bugzilla ID: 252
>> Fixes: c9aa56edec8e ("net/tap: access primary process queues from secondary")
>> Cc: rasland@mellanox.com
>> Cc: stable@dpdk.org
>>
>> Signed-off-by: Herakliusz Lipiec <herakliusz.lipiec@intel.com>
>> ---
>> drivers/net/tap/rte_eth_tap.c | 13 +++++++++++--
>> 1 file changed, 11 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
>> index e9fda8cf6..4a2ef5ce7 100644
>> --- a/drivers/net/tap/rte_eth_tap.c
>> +++ b/drivers/net/tap/rte_eth_tap.c
>> @@ -2111,6 +2111,10 @@ tap_mp_attach_queues(const char *port_name, struct rte_eth_dev *dev)
>> TAP_LOG(DEBUG, "Received IPC reply for %s", reply_param->port_name);
>>
>> /* Attach the queues from received file descriptors */
>> + if (reply_param->rxq_count + reply_param->txq_count != reply->num_fds) {
>> + TAP_LOG(ERR, "Unexpected number of fds received");
>> + return -1;
>> + }
>
> This check is reasonable, but why is this being done on the receive side and not checked on the send side. There may need to be a check for num_fds being zero or greater than 8 as that is the limit to the number of FDs that can be handle by the IPC.
It is done below on the send side as well. This check is for
sanity-checking external input. It's a socket, so anything (with
matching UID) can write to it.
On 4/29/2019 3:02 PM, Burakov, Anatoly wrote:
> On 29-Apr-19 2:53 PM, Ferruh Yigit wrote:
>> On 4/25/2019 6:17 PM, Herakliusz Lipiec wrote:
>>> When secondary to primary process synchronization occours
>>> there is no check for number of fds which could cause buffer overrun.
>>>
>>> Bugzilla ID: 252
>>> Fixes: c9aa56edec8e ("net/tap: access primary process queues from secondary")
>>> Cc: rasland@mellanox.com
>>> Cc: stable@dpdk.org
>>>
>>> Signed-off-by: Herakliusz Lipiec <herakliusz.lipiec@intel.com>
>>> ---
>>> drivers/net/tap/rte_eth_tap.c | 13 +++++++++++--
>>> 1 file changed, 11 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
>>> index e9fda8cf6..4a2ef5ce7 100644
>>> --- a/drivers/net/tap/rte_eth_tap.c
>>> +++ b/drivers/net/tap/rte_eth_tap.c
>>> @@ -2111,6 +2111,10 @@ tap_mp_attach_queues(const char *port_name, struct rte_eth_dev *dev)
>>> TAP_LOG(DEBUG, "Received IPC reply for %s", reply_param->port_name);
>>>
>>> /* Attach the queues from received file descriptors */
>>> + if (reply_param->rxq_count + reply_param->txq_count != reply->num_fds) {
>>> + TAP_LOG(ERR, "Unexpected number of fds received");
>>> + return -1;
>>> + }
>>
>> Is there a way this can happen? If not I suggest remove the check.
>
> Normally no, but theoretically this can trigger a buffer overrun if not
> checked. After all, something could either fail on the other side, or
> someone could send a fake message :) This data is coming from an
> external source, so we need to sanity-check it.
Both sender and receiver are in the same driver, primary and secondary
application paths, there is no communication with external source,
and I don't see any code path that will cause this failure.
After above said, this is just an additional reasonable check and not in the
data path, so having this won't hurt, I don't object to have it.
>
>>
>>> dev->data->nb_rx_queues = reply_param->rxq_count;
>>> dev->data->nb_tx_queues = reply_param->txq_count;
>>> fd_iterator = 0;
>>> @@ -2151,12 +2155,16 @@ tap_mp_sync_queues(const struct rte_mp_msg *request, const void *peer)
>>> /* Fill file descriptors for all queues */
>>> reply.num_fds = 0;
>>> reply_param->rxq_count = 0;
>>> + if (dev->data->nb_rx_queues + dev->data->nb_tx_queues >
>>> + RTE_MP_MAX_FD_NUM){
>>> + TAP_LOG(ERR, "Number of rx/tx queues exceeds max number of fds");
>>> + return -1;
>>> + }
>>
>> +1 for the check.
>> But what it does when return "-1", not send a message at all? If so would it be
>> better to send and error message back instead of waiting the receiver to timeout?
>
> There will be a different patch fixing this specific issue. Probably
> this patch would need to be rebased on top of that.
+1 to fix this issue but I assume it won't be for this release, so can get this
patch now and this part can be updated with mentioned patch.
>
>>
>>> for (queue = 0; queue < dev->data->nb_rx_queues; queue++) {
>>> reply.fds[reply.num_fds++] = process_private->rxq_fds[queue];
>>> reply_param->rxq_count++;
>>> }
>>> RTE_ASSERT(reply_param->rxq_count == dev->data->nb_rx_queues);
>>> - RTE_ASSERT(reply_param->txq_count == dev->data->nb_tx_queues);
>>> RTE_ASSERT(reply.num_fds <= RTE_MP_MAX_FD_NUM);
>>
>> Since there is dynamic check above for "RTE_MP_MAX_FD_NUM", we can remove this
>> assert I think.
>>
>>>
>>> reply_param->txq_count = 0;
>>> @@ -2164,7 +2172,8 @@ tap_mp_sync_queues(const struct rte_mp_msg *request, const void *peer)
>>> reply.fds[reply.num_fds++] = process_private->txq_fds[queue];
>>> reply_param->txq_count++;
>>> }
>>> -
>>> + RTE_ASSERT(reply_param->txq_count == dev->data->nb_tx_queues);
>>> + RTE_ASSERT(reply.num_fds <= RTE_MP_MAX_FD_NUM);
>>
>> Same for this assert, we can remove it.
>> And as syntax, please keep the empty line before next block.
>>
>>> /* Send reply */
>>> strlcpy(reply.name, request->name, sizeof(reply.name));
>>> strlcpy(reply_param->port_name, request_param->port_name,
>>>
>>
>>
>
>
@@ -2111,6 +2111,10 @@ tap_mp_attach_queues(const char *port_name, struct rte_eth_dev *dev)
TAP_LOG(DEBUG, "Received IPC reply for %s", reply_param->port_name);
/* Attach the queues from received file descriptors */
+ if (reply_param->rxq_count + reply_param->txq_count != reply->num_fds) {
+ TAP_LOG(ERR, "Unexpected number of fds received");
+ return -1;
+ }
dev->data->nb_rx_queues = reply_param->rxq_count;
dev->data->nb_tx_queues = reply_param->txq_count;
fd_iterator = 0;
@@ -2151,12 +2155,16 @@ tap_mp_sync_queues(const struct rte_mp_msg *request, const void *peer)
/* Fill file descriptors for all queues */
reply.num_fds = 0;
reply_param->rxq_count = 0;
+ if (dev->data->nb_rx_queues + dev->data->nb_tx_queues >
+ RTE_MP_MAX_FD_NUM){
+ TAP_LOG(ERR, "Number of rx/tx queues exceeds max number of fds");
+ return -1;
+ }
for (queue = 0; queue < dev->data->nb_rx_queues; queue++) {
reply.fds[reply.num_fds++] = process_private->rxq_fds[queue];
reply_param->rxq_count++;
}
RTE_ASSERT(reply_param->rxq_count == dev->data->nb_rx_queues);
- RTE_ASSERT(reply_param->txq_count == dev->data->nb_tx_queues);
RTE_ASSERT(reply.num_fds <= RTE_MP_MAX_FD_NUM);
reply_param->txq_count = 0;
@@ -2164,7 +2172,8 @@ tap_mp_sync_queues(const struct rte_mp_msg *request, const void *peer)
reply.fds[reply.num_fds++] = process_private->txq_fds[queue];
reply_param->txq_count++;
}
-
+ RTE_ASSERT(reply_param->txq_count == dev->data->nb_tx_queues);
+ RTE_ASSERT(reply.num_fds <= RTE_MP_MAX_FD_NUM);
/* Send reply */
strlcpy(reply.name, request->name, sizeof(reply.name));
strlcpy(reply_param->port_name, request_param->port_name,