[v2,3/4] usertools: parse strings from PE images

Message ID 1744680096-17683-4-git-send-email-andremue@linux.microsoft.com (mailing list archive)
State Superseded
Delegated to: David Marchand
Headers
Series allow pmdinfo to be inserted and parsed using MSVC |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Andre Muezerie April 15, 2025, 1:21 a.m. UTC
Script usertools\dpdk-pmdinfo.py was enhanced to also be able to parse
symbols from sections in PE images.

Signed-off-by: Andre Muezerie <andremue@linux.microsoft.com>
---
 usertools/dpdk-pmdinfo.py | 55 ++++++++++++++++++++++++++++++++-------
 1 file changed, 45 insertions(+), 10 deletions(-)
  

Comments

Robin Jarry April 15, 2025, 11:19 a.m. UTC | #1
Hi Andre,

Thanks for the respin. Could you format the code using ruff/black before
sending a v3?

Andre Muezerie, Apr 15, 2025 at 03:21:
> Script usertools\dpdk-pmdinfo.py was enhanced to also be able to parse
> symbols from sections in PE images.
>
> Signed-off-by: Andre Muezerie <andremue@linux.microsoft.com>
> ---
>  usertools/dpdk-pmdinfo.py | 55 ++++++++++++++++++++++++++++++++-------
>  1 file changed, 45 insertions(+), 10 deletions(-)
>
> diff --git a/usertools/dpdk-pmdinfo.py b/usertools/dpdk-pmdinfo.py
> index 9189a2fdbc..9e1ae5b534 100755
> --- a/usertools/dpdk-pmdinfo.py
> +++ b/usertools/dpdk-pmdinfo.py
> @@ -37,8 +37,14 @@
>  from pathlib import Path
>  from typing import Iterable, Iterator, List, Union
>  
> -import elftools
> -from elftools.elf.elffile import ELFError, ELFFile
> +def is_windows():
> +    return os.name == 'nt'

Defining a function before imports is awkward and probably overkill.
Can you replace all is_windows() calls with os.name == "nt"?

> +
> +if is_windows():

if os.name == "nt":

> +    import pefile
> +else:
> +    import elftools
> +    from elftools.elf.elffile import ELFError, ELFFile
>  
>  
>  # ----------------------------------------------------------------------------
> @@ -114,18 +120,23 @@ def parse_pmdinfo(paths: Iterable[Path], search_plugins: bool) -> List[dict]:
>          A list of DPDK drivers info dictionaries.
>      """
>      binaries = set(paths)
> -    for p in paths:
> -        binaries.update(get_needed_libs(p))
> -    if search_plugins:
> -        # cast to list to avoid errors with update while iterating
> -        binaries.update(list(get_plugin_libs(binaries)))
> +
> +    if is_windows():

if os.name == "nt":

> +        section_name = ".rdata"
> +    else:
> +        section_name = ".rodata"
> +        for p in paths:
> +            binaries.update(get_needed_libs(p))
> +        if search_plugins:
> +            # cast to list to avoid errors with update while iterating
> +            binaries.update(list(get_plugin_libs(binaries)))
>  
>      drivers = []
>  
>      for b in binaries:
>          logging.debug("analyzing %s", b)
>          try:
> -            for s in get_elf_strings(b, ".rodata", "PMD_INFO_STRING="):
> +            for s in get_section_strings(b, section_name, "PMD_INFO_STRING="):
>                  try:
>                      info = json.loads(s)
>                      scrub_pci_ids(info)
> @@ -232,8 +243,8 @@ def elftools_version():
>          return (0, 24)
>      return (int(match[1]), int(match[2]))
>  
> -
> -ELFTOOLS_VERSION = elftools_version()
> +if not is_windows():

if os.name != "nt":

> +    ELFTOOLS_VERSION = elftools_version()
>  
>  
>  def from_elftools(s: Union[bytes, str]) -> str:
> @@ -269,6 +280,30 @@ def get_elf_strings(path: Path, section: str, prefix: str) -> Iterator[str]:
>          yield from find_strings(sec.data(), prefix)
>  
>  
> +# ----------------------------------------------------------------------------
> +def get_pe_strings(path: Path, section: str, prefix: str) -> Iterator[str]:
> +    """
> +    Extract strings from a named PE section in a file.
> +    """
> +    pe = pefile.PE(path)
> +    for sec in pe.sections:
> +        section_name = sec.Name.decode().strip('\x00')
> +        if section_name != section:
> +            continue
> +        yield from find_strings(sec.get_data(), prefix)
> +
> +
> +# ----------------------------------------------------------------------------
> +def get_section_strings(path: Path, section: str, prefix: str) -> Iterator[str]:
> +    """
> +    Extract strings from a named section in an ELF or PE file.
> +    """
> +    if is_windows():

if os.name == "nt":

> +        yield from get_pe_strings(path, section, prefix)
> +    else:
> +        yield from get_elf_strings(path, section, prefix)
> +
> +
>  # ----------------------------------------------------------------------------
>  LDD_LIB_RE = re.compile(
>      r"""
  

Patch

diff --git a/usertools/dpdk-pmdinfo.py b/usertools/dpdk-pmdinfo.py
index 9189a2fdbc..9e1ae5b534 100755
--- a/usertools/dpdk-pmdinfo.py
+++ b/usertools/dpdk-pmdinfo.py
@@ -37,8 +37,14 @@ 
 from pathlib import Path
 from typing import Iterable, Iterator, List, Union
 
-import elftools
-from elftools.elf.elffile import ELFError, ELFFile
+def is_windows():
+    return os.name == 'nt'
+
+if is_windows():
+    import pefile
+else:
+    import elftools
+    from elftools.elf.elffile import ELFError, ELFFile
 
 
 # ----------------------------------------------------------------------------
@@ -114,18 +120,23 @@  def parse_pmdinfo(paths: Iterable[Path], search_plugins: bool) -> List[dict]:
         A list of DPDK drivers info dictionaries.
     """
     binaries = set(paths)
-    for p in paths:
-        binaries.update(get_needed_libs(p))
-    if search_plugins:
-        # cast to list to avoid errors with update while iterating
-        binaries.update(list(get_plugin_libs(binaries)))
+
+    if is_windows():
+        section_name = ".rdata"
+    else:
+        section_name = ".rodata"
+        for p in paths:
+            binaries.update(get_needed_libs(p))
+        if search_plugins:
+            # cast to list to avoid errors with update while iterating
+            binaries.update(list(get_plugin_libs(binaries)))
 
     drivers = []
 
     for b in binaries:
         logging.debug("analyzing %s", b)
         try:
-            for s in get_elf_strings(b, ".rodata", "PMD_INFO_STRING="):
+            for s in get_section_strings(b, section_name, "PMD_INFO_STRING="):
                 try:
                     info = json.loads(s)
                     scrub_pci_ids(info)
@@ -232,8 +243,8 @@  def elftools_version():
         return (0, 24)
     return (int(match[1]), int(match[2]))
 
-
-ELFTOOLS_VERSION = elftools_version()
+if not is_windows():
+    ELFTOOLS_VERSION = elftools_version()
 
 
 def from_elftools(s: Union[bytes, str]) -> str:
@@ -269,6 +280,30 @@  def get_elf_strings(path: Path, section: str, prefix: str) -> Iterator[str]:
         yield from find_strings(sec.data(), prefix)
 
 
+# ----------------------------------------------------------------------------
+def get_pe_strings(path: Path, section: str, prefix: str) -> Iterator[str]:
+    """
+    Extract strings from a named PE section in a file.
+    """
+    pe = pefile.PE(path)
+    for sec in pe.sections:
+        section_name = sec.Name.decode().strip('\x00')
+        if section_name != section:
+            continue
+        yield from find_strings(sec.get_data(), prefix)
+
+
+# ----------------------------------------------------------------------------
+def get_section_strings(path: Path, section: str, prefix: str) -> Iterator[str]:
+    """
+    Extract strings from a named section in an ELF or PE file.
+    """
+    if is_windows():
+        yield from get_pe_strings(path, section, prefix)
+    else:
+        yield from get_elf_strings(path, section, prefix)
+
+
 # ----------------------------------------------------------------------------
 LDD_LIB_RE = re.compile(
     r"""