[v2] pci: read amd iommu virtual address width
Checks
Commit Message
Add code to read the virtual address width for AMD processors.
Signed-off-by: Michael Piszczek <mpiszczek@ddn.com>
---
drivers/bus/pci/linux/pci.c | 43 +++++++++++++++++++++++++++++++++++++
1 file changed, 43 insertions(+)
Comments
On Thu, Sep 15, 2022 at 8:40 AM Michael Piszczek <mpiszczek@ddn.com> wrote:
>
> Add code to read the virtual address width for AMD processors.
>
> Signed-off-by: Michael Piszczek <mpiszczek@ddn.com>
> ---
> drivers/bus/pci/linux/pci.c | 43 +++++++++++++++++++++++++++++++++++++
> 1 file changed, 43 insertions(+)
>
> diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
> index e521459870..0c6d79ca74 100644
> --- a/drivers/bus/pci/linux/pci.c
> +++ b/drivers/bus/pci/linux/pci.c
> @@ -4,6 +4,7 @@
>
> #include <string.h>
> #include <dirent.h>
> +#include <sys/stat.h>
>
> #include <rte_log.h>
> #include <rte_bus.h>
> @@ -492,6 +493,38 @@ rte_pci_scan(void)
> }
>
> #if defined(RTE_ARCH_X86)
> +
> +static uint64_t
> +pci_device_amd_iommu_support_va(const struct rte_pci_addr *addr)
> +{
> +#define RD_AMD_CAP_VASIZE_SHIFT 15
> +#define RD_AMD_CAP_VASIZE_MASK (0x7F << RD_AMD_CAP_VASIZE_SHIFT)
> +
> + char filename[PATH_MAX];
> + FILE *fp;
> + uint64_t amd_cap_reg = 0;
> +
> + snprintf(filename, sizeof(filename),
> + "%s/" PCI_PRI_FMT "/iommu/amd-iommu/cap",
> + rte_pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid,
> + addr->function);
> +
> + fp = fopen(filename, "r");
> + if (fp == NULL)
> + return 0;
> +
> + /* We have an Amd IOMMU */
> + if (fscanf(fp, "%" PRIx64, &amd_cap_reg) != 1) {
> + RTE_LOG(ERR, EAL, "%s(): can't read %s\n", __func__, filename);
> + fclose(fp);
> + return 0;
> + }
> +
> + fclose(fp);
> +
> + return ((amd_cap_reg & RD_AMD_CAP_VASIZE_MASK) >> RD_AMD_CAP_VASIZE_SHIFT) + 1;
> +}
> +
> bool
> pci_device_iommu_support_va(const struct rte_pci_device *dev)
> {
> @@ -501,6 +534,16 @@ pci_device_iommu_support_va(const struct rte_pci_device *dev)
> char filename[PATH_MAX];
> FILE *fp;
> uint64_t mgaw, vtd_cap_reg = 0;
> + struct stat s;
> +
> + if (stat("/sys/class/iommu/ivhd2/amd-iommu", &s) == 0) {
> + mgaw = pci_device_amd_iommu_support_va(addr);
> + if (mgaw > 0) {
> + rte_mem_set_dma_mask(mgaw);
> + return true;
> + }
> + return false;
> + }
>
> snprintf(filename, sizeof(filename),
> "%s/" PCI_PRI_FMT "/iommu/intel-iommu/cap",
> --
> 2.34.1
>
Sorry, I picked you guys with @amd.com mail addresses.
If there is someone with knowledge of this piece of AMD hw available,
can this patch be reviewed?
Thanks.
[Public]
Hi Michael,
Thank you for looking into the change, please find my comments shared below
<snipped>
> >
> > Add code to read the virtual address width for AMD processors.
> >
> > Signed-off-by: Michael Piszczek <mpiszczek@ddn.com>
> > ---
> > drivers/bus/pci/linux/pci.c | 43
> > +++++++++++++++++++++++++++++++++++++
> > 1 file changed, 43 insertions(+)
> >
> > diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
> > index e521459870..0c6d79ca74 100644
> > --- a/drivers/bus/pci/linux/pci.c
> > +++ b/drivers/bus/pci/linux/pci.c
> > @@ -4,6 +4,7 @@
> >
> > #include <string.h>
> > #include <dirent.h>
> > +#include <sys/stat.h>
> >
> > #include <rte_log.h>
> > #include <rte_bus.h>
> > @@ -492,6 +493,38 @@ rte_pci_scan(void) }
> >
> > #if defined(RTE_ARCH_X86)
> > +
> > +static uint64_t
> > +pci_device_amd_iommu_support_va(const struct rte_pci_addr *addr) {
> > +#define RD_AMD_CAP_VASIZE_SHIFT 15 #define
> RD_AMD_CAP_VASIZE_MASK
> > +(0x7F << RD_AMD_CAP_VASIZE_SHIFT)
> > +
> > + char filename[PATH_MAX];
> > + FILE *fp;
> > + uint64_t amd_cap_reg = 0;
> > +
> > + snprintf(filename, sizeof(filename),
> > + "%s/" PCI_PRI_FMT "/iommu/amd-iommu/cap",
> > + rte_pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid,
> > + addr->function);
> > +
> > + fp = fopen(filename, "r");
> > + if (fp == NULL)
> > + return 0;
> > +
> > + /* We have an Amd IOMMU */
> > + if (fscanf(fp, "%" PRIx64, &amd_cap_reg) != 1) {
> > + RTE_LOG(ERR, EAL, "%s(): can't read %s\n", __func__, filename);
> > + fclose(fp);
> > + return 0;
> > + }
> > +
> > + fclose(fp);
> > +
> > + return ((amd_cap_reg & RD_AMD_CAP_VASIZE_MASK) >>
> > +RD_AMD_CAP_VASIZE_SHIFT) + 1; }
> > +
> > bool
> > pci_device_iommu_support_va(const struct rte_pci_device *dev) { @@
> > -501,6 +534,16 @@ pci_device_iommu_support_va(const struct
> rte_pci_device *dev)
> > char filename[PATH_MAX];
> > FILE *fp;
> > uint64_t mgaw, vtd_cap_reg = 0;
> > + struct stat s;
> > +
> > + if (stat("/sys/class/iommu/ivhd2/amd-iommu", &s) == 0) {
AMD EPYC can be configured into various Logical NUMA divisions for both 1P and 2P (sockets) for Naples, Rome and Milan. This leads various combinations as 1, 2, 4 for a single socket. Hence hard coding the check for ` ivhd2` could fail even for a valid NPS configuration.
> > + mgaw = pci_device_amd_iommu_support_va(addr);
Instead of checking the device availability, one can exercise the iommu capability directly. For example
```
# ls /sys/bus/pci/devices/0000\:41\:00.0/iommu/amd-iommu/cap -l
-r--r--r-- 1 root root 4096 Oct 10 06:10 /sys/bus/pci/devices/0000:41:00.0/iommu/amd-iommu/cap
```
> > + if (mgaw > 0) {
> > + rte_mem_set_dma_mask(mgaw);
> > + return true;
> > + }
> > + return false;
> > + }
> >
> > snprintf(filename, sizeof(filename),
> > "%s/" PCI_PRI_FMT "/iommu/intel-iommu/cap",
Hence my suggestion would be retaining the same code flow as original function with added check for `amd-iommu` along with `intel-iommu` for x86 devices. The additional function call ` pci_device_amd_iommu_support_va` can be avoided.
> > --
> > 2.34.1
> >
>
> Sorry, I picked you guys with @amd.com mail addresses.
> If there is someone with knowledge of this piece of AMD hw available, can
> this patch be reviewed?
> Thanks.
>
>
> --
> David Marchand
@@ -4,6 +4,7 @@
#include <string.h>
#include <dirent.h>
+#include <sys/stat.h>
#include <rte_log.h>
#include <rte_bus.h>
@@ -492,6 +493,38 @@ rte_pci_scan(void)
}
#if defined(RTE_ARCH_X86)
+
+static uint64_t
+pci_device_amd_iommu_support_va(const struct rte_pci_addr *addr)
+{
+#define RD_AMD_CAP_VASIZE_SHIFT 15
+#define RD_AMD_CAP_VASIZE_MASK (0x7F << RD_AMD_CAP_VASIZE_SHIFT)
+
+ char filename[PATH_MAX];
+ FILE *fp;
+ uint64_t amd_cap_reg = 0;
+
+ snprintf(filename, sizeof(filename),
+ "%s/" PCI_PRI_FMT "/iommu/amd-iommu/cap",
+ rte_pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid,
+ addr->function);
+
+ fp = fopen(filename, "r");
+ if (fp == NULL)
+ return 0;
+
+ /* We have an Amd IOMMU */
+ if (fscanf(fp, "%" PRIx64, &amd_cap_reg) != 1) {
+ RTE_LOG(ERR, EAL, "%s(): can't read %s\n", __func__, filename);
+ fclose(fp);
+ return 0;
+ }
+
+ fclose(fp);
+
+ return ((amd_cap_reg & RD_AMD_CAP_VASIZE_MASK) >> RD_AMD_CAP_VASIZE_SHIFT) + 1;
+}
+
bool
pci_device_iommu_support_va(const struct rte_pci_device *dev)
{
@@ -501,6 +534,16 @@ pci_device_iommu_support_va(const struct rte_pci_device *dev)
char filename[PATH_MAX];
FILE *fp;
uint64_t mgaw, vtd_cap_reg = 0;
+ struct stat s;
+
+ if (stat("/sys/class/iommu/ivhd2/amd-iommu", &s) == 0) {
+ mgaw = pci_device_amd_iommu_support_va(addr);
+ if (mgaw > 0) {
+ rte_mem_set_dma_mask(mgaw);
+ return true;
+ }
+ return false;
+ }
snprintf(filename, sizeof(filename),
"%s/" PCI_PRI_FMT "/iommu/intel-iommu/cap",