[v2,1/2] tools: Add script to create artifacts

Message ID 20240112224014.30955-2-ahassick@iol.unh.edu (mailing list archive)
State New
Headers
Series Add a script to create series artifacts |

Commit Message

Adam Hassick Jan. 12, 2024, 10:40 p.m. UTC
  This script takes in a URL to a series on Patchwork and emits a
tarball which may be used for running tests.

Signed-off-by: Adam Hassick <ahassick@iol.unh.edu>
---
 tools/create_series_artifact.py | 472 ++++++++++++++++++++++++++++++++
 1 file changed, 472 insertions(+)
 create mode 100755 tools/create_series_artifact.py
  

Comments

Juraj Linkeš Jan. 15, 2024, 1:07 p.m. UTC | #1
> +    def set_properties(self):

I see that you've left properties as a dictionary. Looking at how it's
used, it makes sense.
I think we can improve this method. To understand what this method
sets, you'd need to look at the code (instead of having an idea when
it's called). It could still be named set_properties with keyword
arguments being what we're setting:

def set_properties(self, **kwargs):
    for key in kwargs:
        self.properties[key] = kwargs[key]

This way it's obvious what we're setting when calling set_properties:
self.set_properties(tree=self.tree, applied_commit_id=self.commit_id)

I like this more than renaming this to something like
set_tree_and_commit_id, as it's more flexible.

<snip>

> +def parse_args() -> CreateSeriesParameters:

This function name is misleading, as it's not just parsing the args
(and returning those), but also processing them and returning a
different object. I suggest either renaming this or splitting the
function into two (one that parses, the other that does the rest).

<snip>

> +def main() -> int:
> +    data = parse_args()

You can see here what I mean. I'd expect the parse_args to return
args, not data, so it's a bit confusing and requires investigation of
the parse_args method.
  

Patch

diff --git a/tools/create_series_artifact.py b/tools/create_series_artifact.py
new file mode 100755
index 0000000..c1fc636
--- /dev/null
+++ b/tools/create_series_artifact.py
@@ -0,0 +1,472 @@ 
+#!/usr/bin/env python3
+
+import argparse
+from dataclasses import dataclass
+
+import os
+from git_pw import api as pw_api
+import pathlib
+import pygit2
+import requests
+import shutil
+import subprocess
+import tarfile
+from typing import Any, Dict, List, Optional
+import yaml
+
+HELP = """This script will create an artifact given a URL to a Patchwork series.
+Much of the information provided is acquired by this script through the use of a configuration file.
+This configuration file can be found with the other script configs in the config directory of the CI repo.
+
+The configuration file is used to aggregate:
+- Git credentials
+- Patchwork configuration and the user token (user token is optional)
+- The URL of the DPDK Git mirror
+- Locations of dependency scripts and their configuration files
+
+More detail and examples can be found in the default configuration file.
+This default file is located at "config/artifacts.yml" in the dpdk-ci repository.
+
+Example usage:
+
+./create_series_artifact.py ../configs/artifacts.yml https://patches.dpdk.org/api/1.3/series/12345/
+
+"""
+
+# Map the outputs of pw_maintainers_cli to the names of branches on the
+# GitHub mirror. This is temporary, and should be moved elsewhere in
+# the future.
+BRANCH_NAME_MAP = {
+    "next-baseband": "next-baseband-for-main",
+    "next-crypto": "next-crypto-for-main",
+    "next-eventdev": "next-eventdev-for-main",
+    "next-net": "next-net-for-main",
+    "next-net-intel": "next-net-intel-for-next-net",
+    "next-net-brcm": "next-net-brcm-for-next-net",
+    "next-net-mlx": "next-net-mlx-for-next-net",
+    "next-net-mrvl": "next-net-mrvl-for-main",
+    "next-virtio": "next-virtio-for-next-net",
+}
+
+
+@dataclass
+class CreateSeriesParameters(object):
+    pw_server: str
+    pw_project: str
+    pw_token: str
+    git_user: str
+    git_email: str
+    series_url: str
+    patch_ids: List[int]
+    labels: List[str]
+    config: Dict
+    series: Dict
+    pw_mcli_script: pathlib.Path
+    patch_parser_script: pathlib.Path
+    patch_parser_cfg: pathlib.Path
+    lzma: bool
+    output_tarball: pathlib.Path
+    output_properties: pathlib.Path
+    no_depends: bool
+    docs_only: bool
+
+
+class ProjectTree(object):
+    artifact_path: pathlib.Path
+    tree: str
+    commit_id: str
+    path: pathlib.Path
+    log_file_path: pathlib.Path
+    props_file_path: pathlib.Path
+    data: CreateSeriesParameters
+    repo: pygit2.Repository
+    log_buf: List[str]
+    properties: Dict[str, Any]
+
+    def log(self, msg: str):
+        print(msg)
+        self.log_buf.append(msg)
+
+    def write_log(self):
+        with open(self.log_file_path, "w") as log_file:
+            log_file.write("\n".join([msg for msg in self.log_buf]))
+
+    def write_properties(self):
+        with open(self.props_file_path, "w") as prop_file:
+            for key, value in self.properties.items():
+                prop_file.write(f"{key}={value}\n")
+
+    def move_logs(self):
+        shutil.move(self.log_file_path, pathlib.Path(os.getcwd(), "log.txt"))
+        shutil.move(
+            self.props_file_path, pathlib.Path(os.getcwd(), self.data.output_properties)
+        )
+
+    def __init__(self, data: CreateSeriesParameters):
+        self.data = data
+        self.path = pathlib.Path(os.curdir, "dpdk").absolute()
+        self.log_buf = []
+        self.log_file_path = pathlib.Path(self.path, "log.txt")
+        self.props_file_path = pathlib.Path(self.path, data.output_properties)
+        self.tree = "main"
+        self.properties = {}
+        self.artifact_path = data.output_tarball
+
+        # Set the range of patch IDs this series (aka patchset) covers.
+        self.properties["patchset_range"] = f"{data.patch_ids[0]}-{data.patch_ids[-1]}"
+
+        # Set the tags using tags obtained by the params class
+        self.properties["tags"] = " ".join(data.labels)
+
+        # Record whether this patch is only documentation
+        self.properties["is_docs_only"] = str(data.docs_only)
+
+        if not self.path.exists():
+            # Find the URL to clone from based on the tree name.
+            repo_url = self.data.config["repo_url"]
+
+            # Pull down the git repo we found.
+            for i in range(1, 4):
+                self.log(f"Cloning the DPDK mirror at: {repo_url} (Attempt {i} of 3)")
+                try:
+                    repo = pygit2.clone_repository(repo_url, self.path)
+                    break
+                except pygit2.GitError as e:
+                    self.log(f"Failed! Reason: {e}")
+            else:
+                self.log("Failed to clone from the upstream repository.")
+                exit(1)
+        else:
+            # Fetch any new changes.
+            repo = pygit2.Repository(self.path)
+            origin = repo.remotes["origin"]
+            origin.fetch()
+
+            self.log("Cleaning repository state...")
+            repo.state_cleanup()
+
+        # Initially, check out to main.
+        self.repo = repo
+        self.checkout("main")
+
+        self.log(f"Done: {self.tree} commit {self.commit_id}")
+
+    def checkout(self, branch: str) -> Optional[str]:
+        """
+        Check out to some branch.
+        Returns true if successful, false otherwise.
+        """
+
+        git_branch = self.repo.lookup_branch(
+            f"origin/{branch}", pygit2.GIT_BRANCH_REMOTE
+        )
+
+        if not git_branch:
+            self.log(f"Tried to checkout to non-existant branch: {branch}")
+            return None
+
+        self.log(f"Trying to checkout branch: {git_branch.branch_name}")
+        reference = self.repo.resolve_refish(git_branch.branch_name)
+        self.commit_id = str(reference[0].id)
+        self.repo.reset(reference[0].id, pygit2.GIT_RESET_HARD)
+        self.repo.checkout(reference[1])
+        self.tree = branch
+
+        self.log(f"Checked out to {branch} ({self.commit_id})")
+
+        return branch
+
+    def guess_git_tree(self) -> Optional[str]:
+        """
+        Run pw_maintainers_cli to guess the git tree of the patch series we are applying.
+        Returns None if the pw_maintainers_cli failed.
+        """
+
+        if "id" not in self.data.series:
+            raise Exception("ID was not found in the series JSON")
+
+        result = subprocess.run(
+            [
+                self.data.pw_mcli_script,
+                "--type",
+                "series",
+                "--pw-server",
+                self.data.pw_server,
+                "--pw-project",
+                self.data.pw_project,
+                "list-trees",
+                str(self.data.series["id"]),
+            ],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            cwd=self.path,
+            env={
+                "MAINTAINERS_FILE_PATH": "MAINTAINERS",
+                "PW_TOKEN": self.data.pw_token,
+            },
+        )
+
+        if result.returncode == 0:
+            branch = result.stdout.decode().strip()
+
+            if branch in ["main", "dpdk"]:
+                branch = "main"
+            else:
+                if branch.startswith("dpdk-"):
+                    branch = branch[5:]
+
+                branch = BRANCH_NAME_MAP.get(branch)
+
+            return self.checkout(branch)
+        else:
+            self.log("Failed to guess git tree. Output from pw_maintainers_cli:")
+            self.log(result.stdout.decode())
+            self.log(result.stderr.decode())
+            return None
+
+    def set_properties(self):
+        self.properties["tree"] = self.tree
+        self.properties["applied_commit_id"] = self.commit_id
+
+    def apply_patch_series(self) -> bool:
+        # Run git-pw to apply the series.
+
+        # Configure the tree to point at the given patchwork server and project
+        self.repo.config["pw.server"] = self.data.pw_server
+        self.repo.config["pw.project"] = self.data.pw_project
+        self.repo.config["user.email"] = self.data.git_email
+        self.repo.config["user.name"] = self.data.git_user
+
+        result = subprocess.run(
+            ["git", "pw", "series", "apply", str(self.data.series["id"])],
+            cwd=self.path,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+        )
+
+        # Write the log from the apply process to disk.
+        self.log("Applying patch...")
+        self.log(result.stdout.decode())
+        self.log(result.stderr.decode())
+
+        # Store whether there was an error, and return the flag.
+        error = result.returncode != 0
+        self.properties["apply_error"] = error
+        return not error
+
+    def test_build(self) -> bool:
+        ninja_result: Optional[subprocess.CompletedProcess] = None
+        meson_result: subprocess.CompletedProcess = subprocess.run(
+            ["meson", "setup", "build"],
+            cwd=self.path,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+        )
+
+        build_error = meson_result.returncode != 0
+
+        self.log("Running test build...")
+        self.log(meson_result.stdout.decode())
+
+        if not build_error:
+            ninja_result = subprocess.run(
+                ["ninja", "-C", "build"],
+                cwd=self.path,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+            )
+
+            build_error = build_error or ninja_result.returncode != 0
+            shutil.rmtree(pathlib.Path(self.path, "build"))
+
+            self.log(ninja_result.stdout.decode())
+            self.log(ninja_result.stderr.decode())
+
+        self.log(meson_result.stderr.decode())
+
+        if build_error:
+            self.log("Test build failed.")
+
+        self.properties["build_error"] = build_error
+        return not build_error
+
+    def create_tarball(self):
+        # Copy the logs into the artifact tarball.
+        self.write_log()
+        self.write_properties()
+
+        # Create a tar archive containing the DPDK sources.
+        with tarfile.open(
+            self.artifact_path, mode="w:xz" if self.data.lzma else "w:gz"
+        ) as tar_file:
+            tar_file.add(self.path, "dpdk", recursive=True)
+
+        # Move the log file out of the working directory.
+        self.move_logs()
+
+        return True
+
+
+def get_tags(
+    patch_parser_script: pathlib.Path,
+    patch_parser_cfg: pathlib.Path,
+    series: Dict,
+) -> List[str]:
+    series_filename = f"{series['id']}.patch"
+
+    # Pull down the patch series as a single file.
+    pw_api.download(series["mbox"], None, series_filename)
+
+    # Call the patch parser script to obtain the tags
+    parse_result = subprocess.run(
+        [patch_parser_script, patch_parser_cfg, series_filename],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+    )
+
+    # Assert that patch parser succeeded.
+    parse_result.check_returncode()
+
+    # Return the output
+    return parse_result.stdout.decode().splitlines()
+
+
+def parse_args() -> CreateSeriesParameters:
+    """
+    Parses the arguments and returns an instance of a dataclass containing parameters
+    and some derived information.
+    """
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        description=HELP,
+    )
+    parser.add_argument(
+        "config",
+        type=argparse.FileType(),
+        help="The config file to load. Must be a path to a YAML file.",
+    )
+    parser.add_argument("series_url", type=str, help="The URL to a Patchwork series.")
+    parser.add_argument(
+        "-t",
+        "--pw-token",
+        dest="pw_token",
+        type=str,
+        help="The Patchwork token to use",
+    )
+    parser.add_argument(
+        "-l",
+        "--lzma",
+        action="store_true",
+        help="When set, use LZMA compression rather than GNU zip compression.",
+    )
+    parser.add_argument(
+        "-nd",
+        "--no-depends",
+        action="store_true",
+        help="When set, does not acknowledge the Depends-on label.",
+    )
+
+    args = parser.parse_args()
+
+    # Read the configuration file.
+    with args.config as config_file:
+        config = yaml.safe_load(config_file)
+
+    pw_server = config["patchwork"]["server"]
+    pw_project = config["patchwork"]["project"]
+    pw_token = args.pw_token or config["patchwork"].get("token")
+
+    if not pw_token:
+        print("Failed to obtain the Patchworks token.")
+        exit(1)
+
+    pw_mcli_script = pathlib.Path(config["pw_maintainers_cli"]["path"]).absolute()
+
+    git_user = config["git"]["user"]
+    git_email = config["git"]["email"]
+
+    patch_parser_script = pathlib.Path(config["patch_parser"]["path"]).absolute()
+    patch_parser_cfg = pathlib.Path(config["patch_parser"]["config"]).absolute()
+
+    if args.lzma:
+        tarball_name = "dpdk.tar.xz"
+    else:
+        tarball_name = "dpdk.tar.gz"
+
+    output_tarball = pathlib.Path(tarball_name)
+    output_properties = pathlib.Path(f"{tarball_name}.properties")
+
+    # Pull the series JSON down.
+    resp = requests.get(args.series_url)
+    resp.raise_for_status()
+    series = resp.json()
+
+    # Get the labels using the patch parser.
+    labels = get_tags(patch_parser_script, patch_parser_cfg, series)
+
+    # See if this is a documentation-only patch.
+    docs_only = len(labels) == 1 and labels[0] == "documentation"
+
+    # Get the patch ids in this patch series.
+    patch_ids = list(map(lambda x: int(x["id"]), series["patches"]))
+    patch_ids.sort()
+
+    return CreateSeriesParameters(
+        pw_server=pw_server,
+        pw_project=pw_project,
+        pw_token=pw_token,
+        git_user=git_user,
+        git_email=git_email,
+        series_url=args.series_url,
+        patch_ids=patch_ids,
+        labels=labels,
+        config=config,
+        series=series,
+        pw_mcli_script=pw_mcli_script,
+        patch_parser_script=patch_parser_script,
+        patch_parser_cfg=patch_parser_cfg,
+        lzma=args.lzma,
+        output_tarball=output_tarball,
+        output_properties=output_properties,
+        no_depends=args.no_depends,
+        docs_only=docs_only,
+    )
+
+
+def try_to_apply(tree: ProjectTree) -> bool:
+    tree.set_properties()
+    return tree.apply_patch_series() and tree.test_build() and tree.create_tarball()
+
+
+def main() -> int:
+    data = parse_args()
+
+    # Pull down the DPDK mirror.
+    tree = ProjectTree(data)
+
+    # Try to guess the Git tree for this patchset.
+    guessed_tree = tree.guess_git_tree()
+
+    if not guessed_tree:
+        print("Failed to guess git tree.")
+        return 1
+
+    # Try to apply this patch.
+    if not (
+        try_to_apply(tree)  # First, try to apply on the guessed tree.
+        or guessed_tree != "main"  # If that fails, and the guessed tree was not main
+        and tree.checkout("main")  # Checkout to main, then
+        and try_to_apply(tree)  # Try to apply on main
+    ):
+        tree.write_log()
+        tree.write_properties()
+        tree.move_logs()
+
+        print("FAILURE")
+
+        return 1
+    return 0
+
+
+if __name__ == "__main__":
+    exit(main())