[v3,13/13] vdpa/mlx5: disable ROCE
Checks
Commit Message
In order to support virtio queue creation by the FW, ROCE mode
should be disabled in the device.
Do it by netlink which is like the devlink tool commands:
1. devlink dev param set pci/[pci] name enable_roce value false
cmode driverinit
2. devlink dev reload pci/[pci]
Or by sysfs which is like:
echo 0 > /sys/bus/pci/devices/[pci]/roce_enable
The IB device is matched again after ROCE disabling.
Signed-off-by: Matan Azrad <matan@mellanox.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
Acked-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
drivers/vdpa/mlx5/Makefile | 2 +-
drivers/vdpa/mlx5/meson.build | 2 +-
drivers/vdpa/mlx5/mlx5_vdpa.c | 191 ++++++++++++++++++++++++++++++++++--------
3 files changed, 160 insertions(+), 35 deletions(-)
Comments
Hi Matan,
On 2/2/20 5:03 PM, Matan Azrad wrote:
> In order to support virtio queue creation by the FW, ROCE mode
> should be disabled in the device.
>
> Do it by netlink which is like the devlink tool commands:
> 1. devlink dev param set pci/[pci] name enable_roce value false
> cmode driverinit
> 2. devlink dev reload pci/[pci]
> Or by sysfs which is like:
> echo 0 > /sys/bus/pci/devices/[pci]/roce_enable
>
> The IB device is matched again after ROCE disabling.
>
> Signed-off-by: Matan Azrad <matan@mellanox.com>
> Acked-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> Acked-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
> drivers/vdpa/mlx5/Makefile | 2 +-
> drivers/vdpa/mlx5/meson.build | 2 +-
> drivers/vdpa/mlx5/mlx5_vdpa.c | 191 ++++++++++++++++++++++++++++++++++--------
> 3 files changed, 160 insertions(+), 35 deletions(-)
...
> diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c b/drivers/vdpa/mlx5/mlx5_vdpa.c
> index 57619d2..710f305 100644
> --- a/drivers/vdpa/mlx5/mlx5_vdpa.c
> +++ b/drivers/vdpa/mlx5/mlx5_vdpa.c
...
> @@ -246,8 +389,7 @@
> mlx5_vdpa_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
> struct rte_pci_device *pci_dev __rte_unused)
> {
> - struct ibv_device **ibv_list;
> - struct ibv_device *ibv_match = NULL;
> + struct ibv_device *ibv;
> struct mlx5_vdpa_priv *priv = NULL;
> struct ibv_context *ctx = NULL;
> struct mlx5_hca_attr attr;
> @@ -258,42 +400,25 @@
> " driver.");
> return 1;
> }
> - errno = 0;
> - ibv_list = mlx5_glue->get_device_list(&ret);
> - if (!ibv_list) {
> - rte_errno = ENOSYS;
> - DRV_LOG(ERR, "Failed to get device list, is ib_uverbs loaded?");
> + ibv = mlx5_vdpa_get_ib_device_match(&pci_dev->addr);
> + if (!ibv) {
> + DRV_LOG(ERR, "No matching IB device for PCI slot "
> + PCI_PRI_FMT ".", pci_dev->addr.domain,
> + pci_dev->addr.bus, pci_dev->addr.devid,
> + pci_dev->addr.function);
> return -rte_errno;
> - }
> - while (ret-- > 0) {
> - struct rte_pci_addr pci_addr;
> -
> - DRV_LOG(DEBUG, "Checking device \"%s\"..", ibv_list[ret]->name);
> - if (mlx5_dev_to_pci_addr(ibv_list[ret]->ibdev_path, &pci_addr))
> - continue;
> - if (pci_dev->addr.domain != pci_addr.domain ||
> - pci_dev->addr.bus != pci_addr.bus ||
> - pci_dev->addr.devid != pci_addr.devid ||
> - pci_dev->addr.function != pci_addr.function)
> - continue;
> + } else {
> DRV_LOG(INFO, "PCI information matches for device \"%s\".",
> - ibv_list[ret]->name);
> - ibv_match = ibv_list[ret];
> - break;
> + ibv->name);
> }
> - mlx5_glue->free_device_list(ibv_list);
> - if (!ibv_match) {
> - DRV_LOG(ERR, "No matching IB device for PCI slot "
> - "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 ".",
> - pci_dev->addr.domain, pci_dev->addr.bus,
> - pci_dev->addr.devid, pci_dev->addr.function);
> - rte_errno = ENOENT;
> - return -rte_errno;
> + if (mlx5_vdpa_roce_disable(&pci_dev->addr, &ibv) != 0) {
> + DRV_LOG(WARNING, "Failed to disable ROCE for \"%s\".",
> + ibv->name);
> + //return -rte_errno;
> }
Is that commented return expected?
On 2/3/20 10:27 AM, Maxime Coquelin wrote:
> Hi Matan,
>
> On 2/2/20 5:03 PM, Matan Azrad wrote:
>> In order to support virtio queue creation by the FW, ROCE mode
>> should be disabled in the device.
>>
>> Do it by netlink which is like the devlink tool commands:
>> 1. devlink dev param set pci/[pci] name enable_roce value false
>> cmode driverinit
>> 2. devlink dev reload pci/[pci]
>> Or by sysfs which is like:
>> echo 0 > /sys/bus/pci/devices/[pci]/roce_enable
>>
>> The IB device is matched again after ROCE disabling.
>>
>> Signed-off-by: Matan Azrad <matan@mellanox.com>
>> Acked-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
>> Acked-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>> ---
>> drivers/vdpa/mlx5/Makefile | 2 +-
>> drivers/vdpa/mlx5/meson.build | 2 +-
>> drivers/vdpa/mlx5/mlx5_vdpa.c | 191 ++++++++++++++++++++++++++++++++++--------
>> 3 files changed, 160 insertions(+), 35 deletions(-)
> ...
>> diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c b/drivers/vdpa/mlx5/mlx5_vdpa.c
>> index 57619d2..710f305 100644
>> --- a/drivers/vdpa/mlx5/mlx5_vdpa.c
>> +++ b/drivers/vdpa/mlx5/mlx5_vdpa.c
>
> ...
>
>> @@ -246,8 +389,7 @@
>> mlx5_vdpa_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
>> struct rte_pci_device *pci_dev __rte_unused)
>> {
>> - struct ibv_device **ibv_list;
>> - struct ibv_device *ibv_match = NULL;
>> + struct ibv_device *ibv;
>> struct mlx5_vdpa_priv *priv = NULL;
>> struct ibv_context *ctx = NULL;
>> struct mlx5_hca_attr attr;
>> @@ -258,42 +400,25 @@
>> " driver.");
>> return 1;
>> }
>> - errno = 0;
>> - ibv_list = mlx5_glue->get_device_list(&ret);
>> - if (!ibv_list) {
>> - rte_errno = ENOSYS;
>> - DRV_LOG(ERR, "Failed to get device list, is ib_uverbs loaded?");
>> + ibv = mlx5_vdpa_get_ib_device_match(&pci_dev->addr);
>> + if (!ibv) {
>> + DRV_LOG(ERR, "No matching IB device for PCI slot "
>> + PCI_PRI_FMT ".", pci_dev->addr.domain,
>> + pci_dev->addr.bus, pci_dev->addr.devid,
>> + pci_dev->addr.function);
>> return -rte_errno;
>> - }
>> - while (ret-- > 0) {
>> - struct rte_pci_addr pci_addr;
>> -
>> - DRV_LOG(DEBUG, "Checking device \"%s\"..", ibv_list[ret]->name);
>> - if (mlx5_dev_to_pci_addr(ibv_list[ret]->ibdev_path, &pci_addr))
>> - continue;
>> - if (pci_dev->addr.domain != pci_addr.domain ||
>> - pci_dev->addr.bus != pci_addr.bus ||
>> - pci_dev->addr.devid != pci_addr.devid ||
>> - pci_dev->addr.function != pci_addr.function)
>> - continue;
>> + } else {
>> DRV_LOG(INFO, "PCI information matches for device \"%s\".",
>> - ibv_list[ret]->name);
>> - ibv_match = ibv_list[ret];
>> - break;
>> + ibv->name);
>> }
>> - mlx5_glue->free_device_list(ibv_list);
>> - if (!ibv_match) {
>> - DRV_LOG(ERR, "No matching IB device for PCI slot "
>> - "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 ".",
>> - pci_dev->addr.domain, pci_dev->addr.bus,
>> - pci_dev->addr.devid, pci_dev->addr.function);
>> - rte_errno = ENOENT;
>> - return -rte_errno;
>> + if (mlx5_vdpa_roce_disable(&pci_dev->addr, &ibv) != 0) {
>> + DRV_LOG(WARNING, "Failed to disable ROCE for \"%s\".",
>> + ibv->name);
>> + //return -rte_errno;
>> }
>
> Is that commented return expected?
>
Please let me know if I should remove the comment, or remove the return.
Thanks,
Maxime
From: Maxime Coquelin
> Sent: Monday, February 3, 2020 1:00 PM
> To: Matan Azrad <matan@mellanox.com>; dev@dpdk.org; Slava Ovsiienko
> <viacheslavo@mellanox.com>
> Subject: Re: [PATCH v3 13/13] vdpa/mlx5: disable ROCE
>
>
>
> On 2/3/20 10:27 AM, Maxime Coquelin wrote:
> > Hi Matan,
> >
> > On 2/2/20 5:03 PM, Matan Azrad wrote:
> >> In order to support virtio queue creation by the FW, ROCE mode should
> >> be disabled in the device.
> >>
> >> Do it by netlink which is like the devlink tool commands:
> >> 1. devlink dev param set pci/[pci] name enable_roce value false
> >> cmode driverinit
> >> 2. devlink dev reload pci/[pci]
> >> Or by sysfs which is like:
> >> echo 0 > /sys/bus/pci/devices/[pci]/roce_enable
> >>
> >> The IB device is matched again after ROCE disabling.
> >>
> >> Signed-off-by: Matan Azrad <matan@mellanox.com>
> >> Acked-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> >> Acked-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> >> ---
> >> drivers/vdpa/mlx5/Makefile | 2 +-
> >> drivers/vdpa/mlx5/meson.build | 2 +-
> >> drivers/vdpa/mlx5/mlx5_vdpa.c | 191
> >> ++++++++++++++++++++++++++++++++++--------
> >> 3 files changed, 160 insertions(+), 35 deletions(-)
> > ...
> >> diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c
> >> b/drivers/vdpa/mlx5/mlx5_vdpa.c index 57619d2..710f305 100644
> >> --- a/drivers/vdpa/mlx5/mlx5_vdpa.c
> >> +++ b/drivers/vdpa/mlx5/mlx5_vdpa.c
> >
> > ...
> >
> >> @@ -246,8 +389,7 @@
> >> mlx5_vdpa_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
> >> struct rte_pci_device *pci_dev __rte_unused) {
> >> - struct ibv_device **ibv_list;
> >> - struct ibv_device *ibv_match = NULL;
> >> + struct ibv_device *ibv;
> >> struct mlx5_vdpa_priv *priv = NULL;
> >> struct ibv_context *ctx = NULL;
> >> struct mlx5_hca_attr attr;
> >> @@ -258,42 +400,25 @@
> >> " driver.");
> >> return 1;
> >> }
> >> - errno = 0;
> >> - ibv_list = mlx5_glue->get_device_list(&ret);
> >> - if (!ibv_list) {
> >> - rte_errno = ENOSYS;
> >> - DRV_LOG(ERR, "Failed to get device list, is ib_uverbs
> loaded?");
> >> + ibv = mlx5_vdpa_get_ib_device_match(&pci_dev->addr);
> >> + if (!ibv) {
> >> + DRV_LOG(ERR, "No matching IB device for PCI slot "
> >> + PCI_PRI_FMT ".", pci_dev->addr.domain,
> >> + pci_dev->addr.bus, pci_dev->addr.devid,
> >> + pci_dev->addr.function);
> >> return -rte_errno;
> >> - }
> >> - while (ret-- > 0) {
> >> - struct rte_pci_addr pci_addr;
> >> -
> >> - DRV_LOG(DEBUG, "Checking device \"%s\"..", ibv_list[ret]-
> >name);
> >> - if (mlx5_dev_to_pci_addr(ibv_list[ret]->ibdev_path,
> &pci_addr))
> >> - continue;
> >> - if (pci_dev->addr.domain != pci_addr.domain ||
> >> - pci_dev->addr.bus != pci_addr.bus ||
> >> - pci_dev->addr.devid != pci_addr.devid ||
> >> - pci_dev->addr.function != pci_addr.function)
> >> - continue;
> >> + } else {
> >> DRV_LOG(INFO, "PCI information matches for device
> \"%s\".",
> >> - ibv_list[ret]->name);
> >> - ibv_match = ibv_list[ret];
> >> - break;
> >> + ibv->name);
> >> }
> >> - mlx5_glue->free_device_list(ibv_list);
> >> - if (!ibv_match) {
> >> - DRV_LOG(ERR, "No matching IB device for PCI slot "
> >> - "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 ".",
> >> - pci_dev->addr.domain, pci_dev->addr.bus,
> >> - pci_dev->addr.devid, pci_dev->addr.function);
> >> - rte_errno = ENOENT;
> >> - return -rte_errno;
> >> + if (mlx5_vdpa_roce_disable(&pci_dev->addr, &ibv) != 0) {
> >> + DRV_LOG(WARNING, "Failed to disable ROCE for \"%s\".",
> >> + ibv->name);
> >> + //return -rte_errno;
> >> }
> >
> > Is that commented return expected?
> >
>
> Please let me know if I should remove the comment, or remove the return.
Sorry, forgot the comment , good catch!
It should not be comment just need to remove "//".
Can you do it in integration?
>
> Thanks,
> Maxime
On 2/3/20 1:44 PM, Matan Azrad wrote:
>
>
> From: Maxime Coquelin
>> Sent: Monday, February 3, 2020 1:00 PM
>> To: Matan Azrad <matan@mellanox.com>; dev@dpdk.org; Slava Ovsiienko
>> <viacheslavo@mellanox.com>
>> Subject: Re: [PATCH v3 13/13] vdpa/mlx5: disable ROCE
>>
>>
>>
>> On 2/3/20 10:27 AM, Maxime Coquelin wrote:
>>> Hi Matan,
>>>
>>> On 2/2/20 5:03 PM, Matan Azrad wrote:
>>>> In order to support virtio queue creation by the FW, ROCE mode should
>>>> be disabled in the device.
>>>>
>>>> Do it by netlink which is like the devlink tool commands:
>>>> 1. devlink dev param set pci/[pci] name enable_roce value false
>>>> cmode driverinit
>>>> 2. devlink dev reload pci/[pci]
>>>> Or by sysfs which is like:
>>>> echo 0 > /sys/bus/pci/devices/[pci]/roce_enable
>>>>
>>>> The IB device is matched again after ROCE disabling.
>>>>
>>>> Signed-off-by: Matan Azrad <matan@mellanox.com>
>>>> Acked-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
>>>> Acked-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>>>> ---
>>>> drivers/vdpa/mlx5/Makefile | 2 +-
>>>> drivers/vdpa/mlx5/meson.build | 2 +-
>>>> drivers/vdpa/mlx5/mlx5_vdpa.c | 191
>>>> ++++++++++++++++++++++++++++++++++--------
>>>> 3 files changed, 160 insertions(+), 35 deletions(-)
>>> ...
>>>> diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c
>>>> b/drivers/vdpa/mlx5/mlx5_vdpa.c index 57619d2..710f305 100644
>>>> --- a/drivers/vdpa/mlx5/mlx5_vdpa.c
>>>> +++ b/drivers/vdpa/mlx5/mlx5_vdpa.c
>>>
>>> ...
>>>
>>>> @@ -246,8 +389,7 @@
>>>> mlx5_vdpa_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
>>>> struct rte_pci_device *pci_dev __rte_unused) {
>>>> - struct ibv_device **ibv_list;
>>>> - struct ibv_device *ibv_match = NULL;
>>>> + struct ibv_device *ibv;
>>>> struct mlx5_vdpa_priv *priv = NULL;
>>>> struct ibv_context *ctx = NULL;
>>>> struct mlx5_hca_attr attr;
>>>> @@ -258,42 +400,25 @@
>>>> " driver.");
>>>> return 1;
>>>> }
>>>> - errno = 0;
>>>> - ibv_list = mlx5_glue->get_device_list(&ret);
>>>> - if (!ibv_list) {
>>>> - rte_errno = ENOSYS;
>>>> - DRV_LOG(ERR, "Failed to get device list, is ib_uverbs
>> loaded?");
>>>> + ibv = mlx5_vdpa_get_ib_device_match(&pci_dev->addr);
>>>> + if (!ibv) {
>>>> + DRV_LOG(ERR, "No matching IB device for PCI slot "
>>>> + PCI_PRI_FMT ".", pci_dev->addr.domain,
>>>> + pci_dev->addr.bus, pci_dev->addr.devid,
>>>> + pci_dev->addr.function);
>>>> return -rte_errno;
>>>> - }
>>>> - while (ret-- > 0) {
>>>> - struct rte_pci_addr pci_addr;
>>>> -
>>>> - DRV_LOG(DEBUG, "Checking device \"%s\"..", ibv_list[ret]-
>>> name);
>>>> - if (mlx5_dev_to_pci_addr(ibv_list[ret]->ibdev_path,
>> &pci_addr))
>>>> - continue;
>>>> - if (pci_dev->addr.domain != pci_addr.domain ||
>>>> - pci_dev->addr.bus != pci_addr.bus ||
>>>> - pci_dev->addr.devid != pci_addr.devid ||
>>>> - pci_dev->addr.function != pci_addr.function)
>>>> - continue;
>>>> + } else {
>>>> DRV_LOG(INFO, "PCI information matches for device
>> \"%s\".",
>>>> - ibv_list[ret]->name);
>>>> - ibv_match = ibv_list[ret];
>>>> - break;
>>>> + ibv->name);
>>>> }
>>>> - mlx5_glue->free_device_list(ibv_list);
>>>> - if (!ibv_match) {
>>>> - DRV_LOG(ERR, "No matching IB device for PCI slot "
>>>> - "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 ".",
>>>> - pci_dev->addr.domain, pci_dev->addr.bus,
>>>> - pci_dev->addr.devid, pci_dev->addr.function);
>>>> - rte_errno = ENOENT;
>>>> - return -rte_errno;
>>>> + if (mlx5_vdpa_roce_disable(&pci_dev->addr, &ibv) != 0) {
>>>> + DRV_LOG(WARNING, "Failed to disable ROCE for \"%s\".",
>>>> + ibv->name);
>>>> + //return -rte_errno;
>>>> }
>>>
>>> Is that commented return expected?
>>>
>>
>> Please let me know if I should remove the comment, or remove the return.
>
> Sorry, forgot the comment , good catch!
> It should not be comment just need to remove "//".
Thanks Matan.
> Can you do it in integration?
Sure, will do now.
Maxime
>>
>> Thanks,
>> Maxime
>
@@ -29,7 +29,7 @@ CFLAGS += -D_XOPEN_SOURCE=600
CFLAGS += $(WERROR_FLAGS)
CFLAGS += -Wno-strict-prototypes
LDLIBS += -lrte_common_mlx5
-LDLIBS += -lrte_eal -lrte_vhost -lrte_kvargs -lrte_bus_pci -lrte_sched
+LDLIBS += -lrte_eal -lrte_vhost -lrte_kvargs -lrte_pci -lrte_bus_pci -lrte_sched
# A few warnings cannot be avoided in external headers.
CFLAGS += -Wno-error=cast-qual
@@ -9,7 +9,7 @@ endif
fmt_name = 'mlx5_vdpa'
allow_experimental_apis = true
-deps += ['hash', 'common_mlx5', 'vhost', 'bus_pci', 'eal', 'sched']
+deps += ['hash', 'common_mlx5', 'vhost', 'pci', 'bus_pci', 'eal', 'sched']
sources = files(
'mlx5_vdpa.c',
'mlx5_vdpa_mem.c',
@@ -1,15 +1,19 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright 2019 Mellanox Technologies, Ltd
*/
+#include <unistd.h>
+
#include <rte_malloc.h>
#include <rte_log.h>
#include <rte_errno.h>
#include <rte_bus_pci.h>
+#include <rte_pci.h>
#include <mlx5_glue.h>
#include <mlx5_common.h>
#include <mlx5_devx_cmds.h>
#include <mlx5_prm.h>
+#include <mlx5_nl.h>
#include "mlx5_vdpa_utils.h"
#include "mlx5_vdpa.h"
@@ -228,6 +232,145 @@
.get_notify_area = NULL,
};
+static struct ibv_device *
+mlx5_vdpa_get_ib_device_match(struct rte_pci_addr *addr)
+{
+ int n;
+ struct ibv_device **ibv_list = mlx5_glue->get_device_list(&n);
+ struct ibv_device *ibv_match = NULL;
+
+ if (!ibv_list) {
+ rte_errno = ENOSYS;
+ return NULL;
+ }
+ while (n-- > 0) {
+ struct rte_pci_addr pci_addr;
+
+ DRV_LOG(DEBUG, "Checking device \"%s\"..", ibv_list[n]->name);
+ if (mlx5_dev_to_pci_addr(ibv_list[n]->ibdev_path, &pci_addr))
+ continue;
+ if (memcmp(addr, &pci_addr, sizeof(pci_addr)))
+ continue;
+ ibv_match = ibv_list[n];
+ break;
+ }
+ if (!ibv_match)
+ rte_errno = ENOENT;
+ mlx5_glue->free_device_list(ibv_list);
+ return ibv_match;
+}
+
+/* Try to disable ROCE by Netlink\Devlink. */
+static int
+mlx5_vdpa_nl_roce_disable(const char *addr)
+{
+ int nlsk_fd = mlx5_nl_init(NETLINK_GENERIC);
+ int devlink_id;
+ int enable;
+ int ret;
+
+ if (nlsk_fd < 0)
+ return nlsk_fd;
+ devlink_id = mlx5_nl_devlink_family_id_get(nlsk_fd);
+ if (devlink_id < 0) {
+ ret = devlink_id;
+ DRV_LOG(DEBUG, "Failed to get devlink id for ROCE operations by"
+ " Netlink.");
+ goto close;
+ }
+ ret = mlx5_nl_enable_roce_get(nlsk_fd, devlink_id, addr, &enable);
+ if (ret) {
+ DRV_LOG(DEBUG, "Failed to get ROCE enable by Netlink: %d.",
+ ret);
+ goto close;
+ } else if (!enable) {
+ DRV_LOG(INFO, "ROCE has already disabled(Netlink).");
+ goto close;
+ }
+ ret = mlx5_nl_enable_roce_set(nlsk_fd, devlink_id, addr, 0);
+ if (ret)
+ DRV_LOG(DEBUG, "Failed to disable ROCE by Netlink: %d.", ret);
+ else
+ DRV_LOG(INFO, "ROCE is disabled by Netlink successfully.");
+close:
+ close(nlsk_fd);
+ return ret;
+}
+
+/* Try to disable ROCE by sysfs. */
+static int
+mlx5_vdpa_sys_roce_disable(const char *addr)
+{
+ FILE *file_o;
+ int enable;
+ int ret;
+
+ MKSTR(file_p, "/sys/bus/pci/devices/%s/roce_enable", addr);
+ file_o = fopen(file_p, "rb");
+ if (!file_o) {
+ rte_errno = ENOTSUP;
+ return -ENOTSUP;
+ }
+ ret = fscanf(file_o, "%d", &enable);
+ if (ret != 1) {
+ rte_errno = EINVAL;
+ ret = EINVAL;
+ goto close;
+ } else if (!enable) {
+ ret = 0;
+ DRV_LOG(INFO, "ROCE has already disabled(sysfs).");
+ goto close;
+ }
+ fclose(file_o);
+ file_o = fopen(file_p, "wb");
+ if (!file_o) {
+ rte_errno = ENOTSUP;
+ return -ENOTSUP;
+ }
+ fprintf(file_o, "0\n");
+ ret = 0;
+close:
+ if (ret)
+ DRV_LOG(DEBUG, "Failed to disable ROCE by sysfs: %d.", ret);
+ else
+ DRV_LOG(INFO, "ROCE is disabled by sysfs successfully.");
+ fclose(file_o);
+ return ret;
+}
+
+#define MLX5_VDPA_MAX_RETRIES 20
+#define MLX5_VDPA_USEC 1000
+static int
+mlx5_vdpa_roce_disable(struct rte_pci_addr *addr, struct ibv_device **ibv)
+{
+ char addr_name[64] = {0};
+
+ rte_pci_device_name(addr, addr_name, sizeof(addr_name));
+ /* Firstly try to disable ROCE by Netlink and fallback to sysfs. */
+ if (mlx5_vdpa_nl_roce_disable(addr_name) == 0 ||
+ mlx5_vdpa_sys_roce_disable(addr_name) == 0) {
+ /*
+ * Succeed to disable ROCE, wait for the IB device to appear
+ * again after reload.
+ */
+ int r;
+ struct ibv_device *ibv_new;
+
+ for (r = MLX5_VDPA_MAX_RETRIES; r; r--) {
+ ibv_new = mlx5_vdpa_get_ib_device_match(addr);
+ if (ibv_new) {
+ *ibv = ibv_new;
+ return 0;
+ }
+ usleep(MLX5_VDPA_USEC);
+ }
+ DRV_LOG(ERR, "Cannot much device %s after ROCE disable, "
+ "retries exceed %d", addr_name, MLX5_VDPA_MAX_RETRIES);
+ rte_errno = EAGAIN;
+ }
+ return -rte_errno;
+}
+
/**
* DPDK callback to register a PCI device.
*
@@ -246,8 +389,7 @@
mlx5_vdpa_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
struct rte_pci_device *pci_dev __rte_unused)
{
- struct ibv_device **ibv_list;
- struct ibv_device *ibv_match = NULL;
+ struct ibv_device *ibv;
struct mlx5_vdpa_priv *priv = NULL;
struct ibv_context *ctx = NULL;
struct mlx5_hca_attr attr;
@@ -258,42 +400,25 @@
" driver.");
return 1;
}
- errno = 0;
- ibv_list = mlx5_glue->get_device_list(&ret);
- if (!ibv_list) {
- rte_errno = ENOSYS;
- DRV_LOG(ERR, "Failed to get device list, is ib_uverbs loaded?");
+ ibv = mlx5_vdpa_get_ib_device_match(&pci_dev->addr);
+ if (!ibv) {
+ DRV_LOG(ERR, "No matching IB device for PCI slot "
+ PCI_PRI_FMT ".", pci_dev->addr.domain,
+ pci_dev->addr.bus, pci_dev->addr.devid,
+ pci_dev->addr.function);
return -rte_errno;
- }
- while (ret-- > 0) {
- struct rte_pci_addr pci_addr;
-
- DRV_LOG(DEBUG, "Checking device \"%s\"..", ibv_list[ret]->name);
- if (mlx5_dev_to_pci_addr(ibv_list[ret]->ibdev_path, &pci_addr))
- continue;
- if (pci_dev->addr.domain != pci_addr.domain ||
- pci_dev->addr.bus != pci_addr.bus ||
- pci_dev->addr.devid != pci_addr.devid ||
- pci_dev->addr.function != pci_addr.function)
- continue;
+ } else {
DRV_LOG(INFO, "PCI information matches for device \"%s\".",
- ibv_list[ret]->name);
- ibv_match = ibv_list[ret];
- break;
+ ibv->name);
}
- mlx5_glue->free_device_list(ibv_list);
- if (!ibv_match) {
- DRV_LOG(ERR, "No matching IB device for PCI slot "
- "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 ".",
- pci_dev->addr.domain, pci_dev->addr.bus,
- pci_dev->addr.devid, pci_dev->addr.function);
- rte_errno = ENOENT;
- return -rte_errno;
+ if (mlx5_vdpa_roce_disable(&pci_dev->addr, &ibv) != 0) {
+ DRV_LOG(WARNING, "Failed to disable ROCE for \"%s\".",
+ ibv->name);
+ //return -rte_errno;
}
- ctx = mlx5_glue->dv_open_device(ibv_match);
+ ctx = mlx5_glue->dv_open_device(ibv);
if (!ctx) {
- DRV_LOG(ERR, "Failed to open IB device \"%s\".",
- ibv_match->name);
+ DRV_LOG(ERR, "Failed to open IB device \"%s\".", ibv->name);
rte_errno = ENODEV;
return -rte_errno;
}