From 19468118157b1004ebd32dfc898cf9ab1ab6ef22 Mon Sep 17 00:00:00 2001 From: Scott Todd Date: Mon, 19 Jan 2026 11:08:02 -0800 Subject: [PATCH 01/17] Add run_outputs module for CI output path computation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces RunOutputRoot dataclass as the single source of truth for computing paths to all outputs from a GitHub Actions workflow run. This consolidates duplicated path logic scattered across multiple files. The module documents the complete layout structure for: - Build artifacts (.tar.xz, .tar.zst) - Logs and log indexes - Manifests (therock_manifest.json) - Future: Python packages, native packages, reports Changes: - Add build_tools/_therock_utils/run_outputs.py with RunOutputRoot class - Factory methods: from_workflow_run() and for_local() - Path computation for all output types (artifacts, logs, manifests, etc.) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- build_tools/_therock_utils/run_outputs.py | 351 ++++++++++++++++++++++ 1 file changed, 351 insertions(+) create mode 100644 build_tools/_therock_utils/run_outputs.py diff --git a/build_tools/_therock_utils/run_outputs.py b/build_tools/_therock_utils/run_outputs.py new file mode 100644 index 00000000000..04a700e5255 --- /dev/null +++ b/build_tools/_therock_utils/run_outputs.py @@ -0,0 +1,351 @@ +"""TheRock CI run outputs layout specification. + +This module defines the canonical directory structure for all outputs from a +GitHub Actions workflow run. All tools that read or write run outputs should +use this module to compute paths. + +A "run output" is anything produced by a CI workflow run: +- Build artifacts (.tar.xz, .tar.zst archives) +- Logs (.log files, ninja_logs.tar.gz) +- Manifests (therock_manifest.json) +- Reports (build_time_analysis.html, test reports) +- Python packages (.whl, .tar.gz sdists) [future] +- Native packages (.deb, .rpm) [future] + +Layout Structure +---------------- +There is a 1:1 mapping between a GitHub Actions workflow run ID and a run +outputs directory. The structure is: + + {root}/ + ├── {name}_{component}_{family}.tar.xz # Build artifacts (at root) + ├── {name}_{component}_{family}.tar.xz.sha256sum + ├── index-{artifact_group}.html # Per-group artifact index + ├── logs/{artifact_group}/ + │ ├── *.log # Build logs + │ ├── ninja_logs.tar.gz # Ninja timing logs + │ ├── index.html # Log index + │ └── build_time_analysis.html # Build timing (Linux) + ├── manifests/{artifact_group}/ + │ └── therock_manifest.json # Build manifest + ├── python/{artifact_group}/ # Python packages [future] + │ ├── *.whl + │ └── *.tar.gz + └── packages/{artifact_group}/ # Native packages [future] + ├── *.deb + └── *.rpm + +Where: +- {root} = {external_repo}{run_id}-{platform} +- {external_repo} = "" for ROCm/TheRock, or "{owner}-{repo}/" for forks/external +- {run_id} = GitHub Actions workflow run ID +- {platform} = "linux" or "windows" +- {name} = artifact name (e.g., "blas", "core-hip", "amd-llvm") +- {component} = dev|lib|run|test|dbg|doc +- {family} = "generic" or GPU family (gfx94X, gfx1100, etc.) +- {artifact_group} = build variant (e.g., "gfx94X-dcgpu", "gfx110X-all") + +Multi-Platform / Multi-Group Organization +----------------------------------------- +A single workflow run may produce outputs for multiple platforms and artifact +groups. Each platform gets its own root directory: + + s3://therock-ci-artifacts/ + ├── 12345678901-linux/ + │ ├── *.tar.xz (artifacts for all Linux artifact groups) + │ ├── index-gfx94X-dcgpu.html + │ ├── index-gfx110X-all.html + │ ├── logs/gfx94X-dcgpu/ + │ ├── logs/gfx110X-all/ + │ └── ... + └── 12345678901-windows/ + ├── *.tar.xz (artifacts for all Windows artifact groups) + ├── index-gfx110X-dgpu.html + └── ... + +Multiple CI jobs (different GPU families, build variants) upload to the same +run directory, differentiated by artifact_group in subdirectory names and +index filenames. + +Usage +----- + from _therock_utils.run_outputs import RunOutputRoot + + # From CI environment + root = RunOutputRoot.from_workflow_run( + run_id="12345678901", + platform="linux", + ) + + # Get paths for various outputs + print(root.s3_uri) # s3://bucket/12345678901-linux + print(root.artifact_index_url("gfx94X-dcgpu")) # https://...index-gfx94X-dcgpu.html + print(root.logs_s3_uri("gfx94X-dcgpu")) # s3://bucket/.../logs/gfx94X-dcgpu + + # For local development/testing + root = RunOutputRoot.for_local(run_id="local", platform="linux") + local_dir = root.local_path(Path("/tmp/staging")) +""" + +from dataclasses import dataclass +from pathlib import Path +import platform as platform_module + + +@dataclass(frozen=True) +class RunOutputRoot: + """Root location for a workflow run's outputs. + + This is the single source of truth for computing paths to all outputs + from a CI workflow run. Use this class instead of manually constructing + S3 keys or URLs. + + The class is immutable (frozen) to ensure path computation is deterministic. + """ + + bucket: str + """S3 bucket name (e.g., 'therock-ci-artifacts').""" + + external_repo: str + """Repository prefix for namespacing (e.g., 'ROCm-TheRock/' or '').""" + + run_id: str + """GitHub Actions workflow run ID (e.g., '12345678901').""" + + platform: str + """Platform name ('linux' or 'windows').""" + + # ------------------------------------------------------------------------- + # Root paths + # ------------------------------------------------------------------------- + + @property + def prefix(self) -> str: + """S3 key prefix for this run (no leading/trailing slash). + + This is the relative path from the bucket root to the run directory. + """ + return f"{self.external_repo}{self.run_id}-{self.platform}" + + @property + def s3_uri(self) -> str: + """S3 URI for the run root (e.g., 's3://bucket/12345-linux').""" + return f"s3://{self.bucket}/{self.prefix}" + + @property + def https_url(self) -> str: + """Public HTTPS URL for the run root.""" + return f"https://{self.bucket}.s3.amazonaws.com/{self.prefix}" + + def local_path(self, staging_dir: Path) -> Path: + """Local directory path mirroring S3 structure. + + Args: + staging_dir: Base directory for local staging. + + Returns: + Path like {staging_dir}/{external_repo}{run_id}-{platform}/ + """ + return staging_dir / self.prefix + + # ------------------------------------------------------------------------- + # Build artifacts (.tar.xz, .tar.zst) + # + # Artifacts are stored at the root of the run directory. + # This is the current layout; if we migrate to artifacts/ subdirectory, + # only these methods need to change. + # ------------------------------------------------------------------------- + + def artifact_s3_key(self, filename: str) -> str: + """S3 key for a build artifact file. + + Args: + filename: Artifact filename (e.g., 'blas_lib_gfx94X.tar.xz') + """ + return f"{self.prefix}/{filename}" + + def artifact_s3_uri(self, filename: str) -> str: + """S3 URI for a build artifact file.""" + return f"{self.s3_uri}/{filename}" + + def artifact_https_url(self, filename: str) -> str: + """Public HTTPS URL for a build artifact file.""" + return f"{self.https_url}/{filename}" + + def artifact_index_s3_key(self, artifact_group: str) -> str: + """S3 key for the artifact index HTML file.""" + return f"{self.prefix}/index-{artifact_group}.html" + + def artifact_index_url(self, artifact_group: str) -> str: + """Public URL for the artifact index HTML file.""" + return f"{self.https_url}/index-{artifact_group}.html" + + # ------------------------------------------------------------------------- + # Logs + # ------------------------------------------------------------------------- + + def logs_prefix(self, artifact_group: str) -> str: + """S3 key prefix for logs directory (no trailing slash).""" + return f"{self.prefix}/logs/{artifact_group}" + + def logs_s3_uri(self, artifact_group: str) -> str: + """S3 URI for logs directory.""" + return f"s3://{self.bucket}/{self.logs_prefix(artifact_group)}" + + def logs_https_url(self, artifact_group: str) -> str: + """Public HTTPS URL for logs directory.""" + return f"{self.https_url}/logs/{artifact_group}" + + def log_file_s3_key(self, artifact_group: str, filename: str) -> str: + """S3 key for a specific log file.""" + return f"{self.logs_prefix(artifact_group)}/{filename}" + + def log_index_url(self, artifact_group: str) -> str: + """Public URL for the log index HTML file.""" + return f"{self.logs_https_url(artifact_group)}/index.html" + + def build_time_analysis_url(self, artifact_group: str) -> str: + """Public URL for build time analysis HTML (Linux only).""" + return f"{self.logs_https_url(artifact_group)}/build_time_analysis.html" + + # ------------------------------------------------------------------------- + # Manifests + # ------------------------------------------------------------------------- + + def manifests_prefix(self, artifact_group: str) -> str: + """S3 key prefix for manifests directory (no trailing slash).""" + return f"{self.prefix}/manifests/{artifact_group}" + + def manifest_s3_key(self, artifact_group: str) -> str: + """S3 key for therock_manifest.json.""" + return f"{self.manifests_prefix(artifact_group)}/therock_manifest.json" + + def manifest_s3_uri(self, artifact_group: str) -> str: + """S3 URI for therock_manifest.json.""" + return f"s3://{self.bucket}/{self.manifest_s3_key(artifact_group)}" + + def manifest_url(self, artifact_group: str) -> str: + """Public URL for therock_manifest.json.""" + return f"{self.https_url}/manifests/{artifact_group}/therock_manifest.json" + + # ------------------------------------------------------------------------- + # Python packages (.whl, .tar.gz) [future] + # ------------------------------------------------------------------------- + + def python_prefix(self, artifact_group: str) -> str: + """S3 key prefix for Python packages directory.""" + return f"{self.prefix}/python/{artifact_group}" + + def python_s3_uri(self, artifact_group: str) -> str: + """S3 URI for Python packages directory.""" + return f"s3://{self.bucket}/{self.python_prefix(artifact_group)}" + + def python_package_s3_key(self, artifact_group: str, filename: str) -> str: + """S3 key for a Python package file (.whl or .tar.gz).""" + return f"{self.python_prefix(artifact_group)}/{filename}" + + # ------------------------------------------------------------------------- + # Native packages (.deb, .rpm) [future] + # ------------------------------------------------------------------------- + + def packages_prefix(self, artifact_group: str) -> str: + """S3 key prefix for native packages directory.""" + return f"{self.prefix}/packages/{artifact_group}" + + def packages_s3_uri(self, artifact_group: str) -> str: + """S3 URI for native packages directory.""" + return f"s3://{self.bucket}/{self.packages_prefix(artifact_group)}" + + def native_package_s3_key(self, artifact_group: str, filename: str) -> str: + """S3 key for a native package file (.deb, .rpm).""" + return f"{self.packages_prefix(artifact_group)}/{filename}" + + # ------------------------------------------------------------------------- + # Reports (.html) [future] + # ------------------------------------------------------------------------- + + def reports_prefix(self, artifact_group: str) -> str: + """S3 key prefix for reports directory.""" + return f"{self.prefix}/reports/{artifact_group}" + + def reports_s3_uri(self, artifact_group: str) -> str: + """S3 URI for reports directory.""" + return f"s3://{self.bucket}/{self.reports_prefix(artifact_group)}" + + def report_s3_key(self, artifact_group: str, filename: str) -> str: + """S3 key for a report file (.html).""" + return f"{self.reports_prefix(artifact_group)}/{filename}" + + def report_url(self, artifact_group: str, filename: str) -> str: + """Public URL for a report file.""" + return f"{self.https_url}/reports/{artifact_group}/{filename}" + + # ------------------------------------------------------------------------- + # Factory methods + # ------------------------------------------------------------------------- + + @classmethod + def from_workflow_run( + cls, + run_id: str, + platform: str, + github_repository: str | None = None, + workflow_run: dict | None = None, + ) -> "RunOutputRoot": + """Create from workflow context. + + Uses retrieve_bucket_info() to determine the appropriate S3 bucket + and external_repo prefix based on the repository and workflow run. + + Args: + run_id: GitHub Actions workflow run ID. + platform: Platform name ('linux' or 'windows'). + github_repository: Repository in 'owner/repo' format. If None, + uses GITHUB_REPOSITORY env var or defaults to 'ROCm/TheRock'. + workflow_run: Optional workflow run dict from GitHub API. If + provided, avoids an API call in retrieve_bucket_info(). + + Returns: + RunOutputRoot configured for the workflow run. + """ + from ..github_actions.github_actions_utils import retrieve_bucket_info + + external_repo, bucket = retrieve_bucket_info( + github_repository=github_repository, + workflow_run=workflow_run, + ) + return cls( + bucket=bucket, + external_repo=external_repo, + run_id=run_id, + platform=platform, + ) + + @classmethod + def for_local( + cls, + run_id: str = "local", + platform: str | None = None, + bucket: str = "local", + ) -> "RunOutputRoot": + """Create for local development/testing. + + Creates a RunOutputRoot with placeholder values suitable for local + staging directories. The bucket name is set to 'local' by default. + + Args: + run_id: Run identifier (default: 'local'). + platform: Platform name. If None, detects from current system. + bucket: Bucket name placeholder (default: 'local'). + + Returns: + RunOutputRoot configured for local use. + """ + if platform is None: + platform = platform_module.system().lower() + return cls( + bucket=bucket, + external_repo="", + run_id=run_id, + platform=platform, + ) From c6a87e522c85f6f761abfe762117c52215384d87 Mon Sep 17 00:00:00 2001 From: Scott Todd Date: Mon, 19 Jan 2026 11:23:55 -0800 Subject: [PATCH 02/17] Migrate post_build_upload.py to use RunOutputRoot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces manual S3 path construction with the new RunOutputRoot class, consolidating all path computation into a single source of truth. Changes: - Update post_build_upload.py to import and use RunOutputRoot - Change upload functions to take RunOutputRoot instead of bucket_uri strings - Replace f-string path construction with RunOutputRoot method calls - Add artifact_index_s3_uri() method to RunOutputRoot for upload paths - Change from wildcard import to explicit imports from github_actions_utils This makes the path layout easier to understand and modify. Future layout changes (e.g., moving artifacts to a subdirectory) only require updating RunOutputRoot methods. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- build_tools/_therock_utils/run_outputs.py | 4 + .../github_actions/post_build_upload.py | 74 ++++++++++--------- 2 files changed, 42 insertions(+), 36 deletions(-) diff --git a/build_tools/_therock_utils/run_outputs.py b/build_tools/_therock_utils/run_outputs.py index 04a700e5255..8098fd190c0 100644 --- a/build_tools/_therock_utils/run_outputs.py +++ b/build_tools/_therock_utils/run_outputs.py @@ -176,6 +176,10 @@ def artifact_index_s3_key(self, artifact_group: str) -> str: """S3 key for the artifact index HTML file.""" return f"{self.prefix}/index-{artifact_group}.html" + def artifact_index_s3_uri(self, artifact_group: str) -> str: + """S3 URI for the artifact index HTML file (for uploads).""" + return f"s3://{self.bucket}/{self.artifact_index_s3_key(artifact_group)}" + def artifact_index_url(self, artifact_group: str) -> str: """Public URL for the artifact index HTML file.""" return f"{self.https_url}/index-{artifact_group}.html" diff --git a/build_tools/github_actions/post_build_upload.py b/build_tools/github_actions/post_build_upload.py index 27034c15ce4..a5cb93d2f0b 100644 --- a/build_tools/github_actions/post_build_upload.py +++ b/build_tools/github_actions/post_build_upload.py @@ -31,11 +31,15 @@ import sys import tarfile -from github_actions_utils import * +from github_actions_utils import gha_append_step_summary, str2bool THEROCK_DIR = Path(__file__).resolve().parent.parent.parent PLATFORM = platform.system().lower() +# Add build_tools to path for _therock_utils imports +sys.path.insert(0, str(THEROCK_DIR / "build_tools")) +from _therock_utils.run_outputs import RunOutputRoot + # Importing indexer.py sys.path.append(str(THEROCK_DIR / "third-party" / "indexer")) import indexer @@ -177,7 +181,7 @@ def index_artifact_files(build_dir: Path): indexer.process_dir(artifacts_dir, indexer_args) -def upload_artifacts(artifact_group: str, build_dir: Path, bucket_uri: str): +def upload_artifacts(artifact_group: str, build_dir: Path, run_root: RunOutputRoot): log("Uploading artifacts to S3") # Uploading artifacts to S3 bucket @@ -186,7 +190,7 @@ def upload_artifacts(artifact_group: str, build_dir: Path, bucket_uri: str): "s3", "cp", str(build_dir / "artifacts"), - bucket_uri, + run_root.s3_uri, "--recursive", "--no-follow-symlinks", "--exclude", @@ -204,13 +208,13 @@ def upload_artifacts(artifact_group: str, build_dir: Path, bucket_uri: str): "s3", "cp", str(build_dir / "artifacts" / "index.html"), - f"{bucket_uri}/index-{artifact_group}.html", + run_root.artifact_index_s3_uri(artifact_group), ] exec(cmd, cwd=Path.cwd()) -def upload_logs_to_s3(artifact_group: str, build_dir: Path, bucket_uri: str): - s3_base_path = f"{bucket_uri}/logs/{artifact_group}" +def upload_logs_to_s3(artifact_group: str, build_dir: Path, run_root: RunOutputRoot): + logs_s3_uri = run_root.logs_s3_uri(artifact_group) log_dir = build_dir / "logs" if not log_dir.is_dir(): @@ -222,60 +226,62 @@ def upload_logs_to_s3(artifact_group: str, build_dir: Path, bucket_uri: str): if not log_files: log("[WARN] No .log or .tar.gz files found. Skipping log upload.") else: - run_aws_cp(log_dir, s3_base_path, content_type="text/plain") + run_aws_cp(log_dir, logs_s3_uri, content_type="text/plain") # Build Time Analysis is only generated on Linux analysis_path = log_dir / "build_time_analysis.html" if analysis_path.is_file(): - analysis_s3_dest = f"{s3_base_path}/build_time_analysis.html" + analysis_s3_dest = f"{logs_s3_uri}/build_time_analysis.html" run_aws_cp(analysis_path, analysis_s3_dest, content_type="text/html") log(f"[INFO] Uploaded {analysis_path} to {analysis_s3_dest}") # Upload index.html index_path = log_dir / "index.html" if index_path.is_file(): - index_s3_dest = f"{s3_base_path}/index.html" + index_s3_dest = f"{logs_s3_uri}/index.html" run_aws_cp(index_path, index_s3_dest, content_type="text/html") log(f"[INFO] Uploaded {index_path} to {index_s3_dest}") else: log(f"[INFO] No index.html found at {log_dir}. Skipping index upload.") -def upload_manifest_to_s3(artifact_group: str, build_dir: Path, bucket_uri: str): - """ - Upload therock_manifest.json to: - /manifests//therock_manifest.json - """ - +def upload_manifest_to_s3( + artifact_group: str, build_dir: Path, run_root: RunOutputRoot +): + """Upload therock_manifest.json to the run outputs directory.""" manifest_path = ( build_dir / "base" / "aux-overlay" / "build" / "therock_manifest.json" ) if not manifest_path.is_file(): raise FileNotFoundError(f"therock_manifest.json not found at {manifest_path}") - dest = f"{bucket_uri}/manifests/{artifact_group}/therock_manifest.json" + dest = run_root.manifest_s3_uri(artifact_group) log(f"[INFO] Uploading manifest {manifest_path} -> {dest}") run_aws_cp(manifest_path, dest, content_type="application/json") -def write_gha_build_summary(artifact_group: str, bucket_url: str, job_status: str): - log(f"Adding links to job summary to bucket {bucket_url}") +def write_gha_build_summary( + artifact_group: str, run_root: RunOutputRoot, job_status: str +): + log(f"Adding links to job summary to {run_root.https_url}") - log_index_url = f"{bucket_url}/logs/{artifact_group}/index.html" - gha_append_step_summary(f"[Build Logs]({log_index_url})") + gha_append_step_summary(f"[Build Logs]({run_root.log_index_url(artifact_group)})") # Build Time Analysis is only generated on Linux if PLATFORM == "linux": - analysis_url = f"{bucket_url}/logs/{artifact_group}/build_time_analysis.html" - gha_append_step_summary(f"[Build Time Analysis]({analysis_url})") + gha_append_step_summary( + f"[Build Time Analysis]({run_root.build_time_analysis_url(artifact_group)})" + ) # Only add artifact links if the job not failed if not job_status or job_status == "success": - artifact_url = f"{bucket_url}/index-{artifact_group}.html" - gha_append_step_summary(f"[Artifacts]({artifact_url})") + gha_append_step_summary( + f"[Artifacts]({run_root.artifact_index_url(artifact_group)})" + ) - manifest_url = f"{bucket_url}/manifests/{artifact_group}/therock_manifest.json" - gha_append_step_summary(f"[TheRock Manifest]({manifest_url})") + gha_append_step_summary( + f"[TheRock Manifest]({run_root.manifest_url(artifact_group)})" + ) def run(args): @@ -294,12 +300,8 @@ def run(args): if not args.upload: return - external_repo_path, bucket = retrieve_bucket_info() - run_id = args.run_id - bucket_uri = f"s3://{bucket}/{external_repo_path}{run_id}-{PLATFORM}" - bucket_url = ( - f"https://{bucket}.s3.amazonaws.com/{external_repo_path}{run_id}-{PLATFORM}" - ) + run_root = RunOutputRoot.from_workflow_run(run_id=args.run_id, platform=PLATFORM) + log(f"Run outputs root: {run_root.s3_uri}") log("Write Windows time sync log") log("----------------------") @@ -309,19 +311,19 @@ def run(args): if not args.job_status or args.job_status == "success": log("Upload build artifacts") log("----------------------") - upload_artifacts(args.artifact_group, args.build_dir, bucket_uri) + upload_artifacts(args.artifact_group, args.build_dir, run_root) log("Upload log") log("----------") - upload_logs_to_s3(args.artifact_group, args.build_dir, bucket_uri) + upload_logs_to_s3(args.artifact_group, args.build_dir, run_root) log("Upload manifest") log("----------------") - upload_manifest_to_s3(args.artifact_group, args.build_dir, bucket_uri) + upload_manifest_to_s3(args.artifact_group, args.build_dir, run_root) log("Write github actions build summary") log("--------------------") - write_gha_build_summary(args.artifact_group, bucket_url, args.job_status) + write_gha_build_summary(args.artifact_group, run_root, args.job_status) if __name__ == "__main__": From fb125a45324bfe691b880a283f7d39d1e8dd3d2d Mon Sep 17 00:00:00 2001 From: Scott Todd Date: Mon, 19 Jan 2026 11:27:32 -0800 Subject: [PATCH 03/17] Migrate artifact_backend.py to use RunOutputRoot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactors S3Backend and LocalDirectoryBackend to use RunOutputRoot for path computation, consolidating all path logic into a single source of truth. Changes: - S3Backend now takes RunOutputRoot instead of bucket/run_id/platform/external_repo - LocalDirectoryBackend now takes RunOutputRoot instead of run_id/platform - LocalDirectoryBackend path pattern fixed to match S3 (removed 'run-' prefix) - create_backend_from_env creates RunOutputRoot first, then passes to backends - bucket and s3_prefix are now properties delegating to run_root This ensures local staging directories use the same layout as S3, making it easier to test and debug artifact handling locally. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../_therock_utils/artifact_backend.py | 76 ++++++++----------- 1 file changed, 33 insertions(+), 43 deletions(-) diff --git a/build_tools/_therock_utils/artifact_backend.py b/build_tools/_therock_utils/artifact_backend.py index 378db6e402f..7631b816288 100644 --- a/build_tools/_therock_utils/artifact_backend.py +++ b/build_tools/_therock_utils/artifact_backend.py @@ -6,6 +6,9 @@ Environment-based switching: - THEROCK_LOCAL_STAGING_DIR set → use LocalDirectoryBackend - Otherwise → use S3Backend with existing retrieve_bucket_info() logic + +Path computation is delegated to RunOutputRoot (see run_outputs.py) to ensure +consistent paths across all tools. """ from abc import ABC, abstractmethod @@ -15,6 +18,8 @@ import os import shutil +from .run_outputs import RunOutputRoot + @dataclass class ArtifactLocation: @@ -84,16 +89,17 @@ class LocalDirectoryBackend(ArtifactBackend): """Backend using a local directory (for testing/prototyping). Directory structure mirrors S3: - {staging_dir}/run-{run_id}-{platform}/ + {staging_dir}/{run_id}-{platform}/ {artifact_name}_{component}_{target_family}.tar.zst {artifact_name}_{component}_{target_family}.tar.zst.sha256sum + + Uses RunOutputRoot for path computation to ensure consistency with S3 layout. """ - def __init__(self, staging_dir: Path, run_id: str, platform: str = "linux"): + def __init__(self, staging_dir: Path, run_root: RunOutputRoot): self.staging_dir = Path(staging_dir) - self.run_id = run_id - self.platform = platform - self.base_path = self.staging_dir / f"run-{run_id}-{platform}" + self.run_root = run_root + self.base_path = run_root.local_path(self.staging_dir) self.base_path.mkdir(parents=True, exist_ok=True) @property @@ -146,29 +152,27 @@ def artifact_exists(self, artifact_key: str) -> bool: class S3Backend(ArtifactBackend): - """Backend using AWS S3 (wraps existing implementation patterns). + """Backend using AWS S3. S3 path structure: - s3://{bucket}/{external_repo}{run_id}-{platform}/ + s3://{bucket}/{prefix}/ {artifact_name}_{component}_{target_family}.tar.zst + + Uses RunOutputRoot for path computation to ensure consistency across tools. """ - def __init__( - self, - bucket: str, - run_id: str, - platform: str = "linux", - external_repo: str = "", - ): - self.bucket = bucket - self.external_repo = external_repo - self.run_id = run_id - self.platform = platform - self.s3_prefix = f"{external_repo}{run_id}-{platform}" - - # Initialize S3 client (lazy, reuse existing patterns) + def __init__(self, run_root: RunOutputRoot): + self.run_root = run_root self._s3_client = None + @property + def bucket(self) -> str: + return self.run_root.bucket + + @property + def s3_prefix(self) -> str: + return self.run_root.prefix + @property def s3_client(self): """Lazy initialization of S3 client.""" @@ -199,7 +203,7 @@ def s3_client(self): @property def base_uri(self) -> str: - return f"s3://{self.bucket}/{self.s3_prefix}" + return self.run_root.s3_uri def list_artifacts(self, name_filter: Optional[str] = None) -> List[str]: """List S3 artifacts.""" @@ -258,10 +262,10 @@ def create_backend_from_env( Environment variables: - THEROCK_LOCAL_STAGING_DIR: If set, use local backend - THEROCK_RUN_ID: Override run ID (default: "local" or GITHUB_RUN_ID) - - THEROCK_PLATFORM: Override platform (default: "linux") + - THEROCK_PLATFORM: Override platform (default: current platform) For S3 backend (when THEROCK_LOCAL_STAGING_DIR is not set): - - Uses existing retrieve_bucket_info() logic + - Uses RunOutputRoot.from_workflow_run() for bucket selection - Respects GITHUB_REPOSITORY, IS_PR_FROM_FORK, etc. """ import platform as platform_module @@ -273,26 +277,12 @@ def create_backend_from_env( run_id = run_id or os.getenv("THEROCK_RUN_ID", os.getenv("GITHUB_RUN_ID", "local")) if local_staging: + run_root = RunOutputRoot.for_local(run_id=run_id, platform=platform_name) return LocalDirectoryBackend( staging_dir=Path(local_staging), - run_id=run_id, - platform=platform_name, + run_root=run_root, ) - # Default to S3 (existing behavior) - # Import here to avoid circular dependency and missing module issues - try: - from .github_actions_utils import retrieve_bucket_info - - external_repo, bucket = retrieve_bucket_info() - except (ImportError, ModuleNotFoundError): - # Fallback for when github_actions_utils is not available - bucket = os.getenv("THEROCK_S3_BUCKET", "therock-ci-artifacts") - external_repo = "" - - return S3Backend( - bucket=bucket, - run_id=run_id, - platform=platform_name, - external_repo=external_repo, - ) + # Default to S3 + run_root = RunOutputRoot.from_workflow_run(run_id=run_id, platform=platform_name) + return S3Backend(run_root=run_root) From 101d770844c98e3e2982f5a4f8a515d49a637412 Mon Sep 17 00:00:00 2001 From: Scott Todd Date: Mon, 19 Jan 2026 11:31:45 -0800 Subject: [PATCH 04/17] Reorder property --- build_tools/_therock_utils/artifact_backend.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/build_tools/_therock_utils/artifact_backend.py b/build_tools/_therock_utils/artifact_backend.py index 7631b816288..b4df474ab8a 100644 --- a/build_tools/_therock_utils/artifact_backend.py +++ b/build_tools/_therock_utils/artifact_backend.py @@ -165,6 +165,10 @@ def __init__(self, run_root: RunOutputRoot): self.run_root = run_root self._s3_client = None + @property + def base_uri(self) -> str: + return self.run_root.s3_uri + @property def bucket(self) -> str: return self.run_root.bucket @@ -201,10 +205,6 @@ def s3_client(self): ) return self._s3_client - @property - def base_uri(self) -> str: - return self.run_root.s3_uri - def list_artifacts(self, name_filter: Optional[str] = None) -> List[str]: """List S3 artifacts.""" paginator = self.s3_client.get_paginator("list_objects_v2") From 159881fd169507982b21e75b4fa2701ba216d7c1 Mon Sep 17 00:00:00 2001 From: Scott Todd Date: Mon, 19 Jan 2026 12:06:22 -0800 Subject: [PATCH 05/17] Add unit tests for RunOutputRoot and update artifact_backend tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds comprehensive tests for the new RunOutputRoot class covering: - Basic properties (prefix, s3_uri, https_url, local_path) - Artifact path methods (s3_key, s3_uri, index_url) - Log path methods (prefix, s3_uri, index_url, build_time_analysis_url) - Manifest path methods (prefix, s3_key, s3_uri, url) - Future output types (python packages, native packages, reports) - Factory methods (for_local) - Immutability verification Updates artifact_backend_test.py to use the new RunOutputRoot-based constructor signatures for S3Backend and LocalDirectoryBackend. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- build_tools/tests/artifact_backend_test.py | 62 +++- build_tools/tests/run_outputs_test.py | 400 +++++++++++++++++++++ 2 files changed, 443 insertions(+), 19 deletions(-) create mode 100644 build_tools/tests/run_outputs_test.py diff --git a/build_tools/tests/artifact_backend_test.py b/build_tools/tests/artifact_backend_test.py index 655e257496f..1f68afb9b55 100644 --- a/build_tools/tests/artifact_backend_test.py +++ b/build_tools/tests/artifact_backend_test.py @@ -16,6 +16,7 @@ S3Backend, create_backend_from_env, ) +from _therock_utils.run_outputs import RunOutputRoot class TestLocalDirectoryBackend(unittest.TestCase): @@ -24,11 +25,16 @@ class TestLocalDirectoryBackend(unittest.TestCase): def setUp(self): """Create a temporary directory for testing.""" self.temp_dir = tempfile.mkdtemp() - self.backend = LocalDirectoryBackend( - staging_dir=Path(self.temp_dir), + self.run_root = RunOutputRoot( + bucket="local", + external_repo="", run_id="test-run-123", platform="linux", ) + self.backend = LocalDirectoryBackend( + staging_dir=Path(self.temp_dir), + run_root=self.run_root, + ) def tearDown(self): """Clean up temporary directory.""" @@ -38,7 +44,8 @@ def tearDown(self): def test_base_uri(self): """Test that base_uri returns the correct path.""" - expected = str(Path(self.temp_dir) / "run-test-run-123-linux") + # Path now matches S3 layout (no "run-" prefix) + expected = str(Path(self.temp_dir) / "test-run-123-linux") self.assertEqual(self.backend.base_uri, expected) def test_base_path_created(self): @@ -189,12 +196,13 @@ class TestS3Backend(unittest.TestCase): def setUp(self): """Set up S3Backend with mocked client.""" - self.backend = S3Backend( + self.run_root = RunOutputRoot( bucket="test-bucket", + external_repo="external/", run_id="test-run-456", platform="linux", - external_repo="external/", ) + self.backend = S3Backend(run_root=self.run_root) def test_base_uri(self): """Test that base_uri returns the correct S3 URI.""" @@ -219,11 +227,13 @@ def test_s3_client_with_credentials(self, mock_boto_client): "AWS_SESSION_TOKEN": "test-token", }, ): - backend = S3Backend( + run_root = RunOutputRoot( bucket="test-bucket", + external_repo="", run_id="test-run", platform="linux", ) + backend = S3Backend(run_root=run_root) # Access client to trigger initialization _ = backend.s3_client @@ -241,11 +251,13 @@ def test_s3_client_without_credentials(self, mock_boto_client): # Clear any existing credentials with mock.patch.dict(os.environ, {}, clear=True): - backend = S3Backend( + run_root = RunOutputRoot( bucket="test-bucket", + external_repo="", run_id="test-run", platform="linux", ) + backend = S3Backend(run_root=run_root) # Access client to trigger initialization _ = backend.s3_client @@ -407,6 +419,10 @@ def test_local_backend_from_env(self): self.assertIsInstance(backend, LocalDirectoryBackend) self.assertIn("env-run-id", backend.base_uri) self.assertIn("windows", backend.base_uri) + # Verify RunOutputRoot is used + self.assertIsInstance(backend.run_root, RunOutputRoot) + self.assertEqual(backend.run_root.run_id, "env-run-id") + self.assertEqual(backend.run_root.platform, "windows") def test_local_backend_with_overrides(self): """Test LocalDirectoryBackend with explicit run_id and platform.""" @@ -424,28 +440,36 @@ def test_local_backend_with_overrides(self): self.assertIsInstance(backend, LocalDirectoryBackend) self.assertIn("override-run", backend.base_uri) self.assertIn("override-platform", backend.base_uri) + # Verify RunOutputRoot is used + self.assertEqual(backend.run_root.run_id, "override-run") + self.assertEqual(backend.run_root.platform, "override-platform") - @mock.patch("_therock_utils.artifact_backend.S3Backend") - def test_s3_backend_when_no_local_dir(self, mock_s3_backend): + @mock.patch("_therock_utils.artifact_backend.RunOutputRoot.from_workflow_run") + def test_s3_backend_when_no_local_dir(self, mock_from_workflow_run): """Test that S3Backend is created when THEROCK_LOCAL_STAGING_DIR is not set.""" + # Create a mock RunOutputRoot + mock_run_root = RunOutputRoot( + bucket="test-bucket", + external_repo="", + run_id="s3-run-id", + platform="linux", + ) + mock_from_workflow_run.return_value = mock_run_root + with mock.patch.dict( os.environ, { - "THEROCK_S3_BUCKET": "test-bucket", "THEROCK_RUN_ID": "s3-run-id", }, clear=True, ): - # Mock the github_actions_utils import to fail - with mock.patch.dict( - sys.modules, {"_therock_utils.github_actions_utils": None} - ): - backend = create_backend_from_env() + backend = create_backend_from_env() - mock_s3_backend.assert_called_once() - call_kwargs = mock_s3_backend.call_args[1] - self.assertEqual(call_kwargs["bucket"], "test-bucket") - self.assertEqual(call_kwargs["run_id"], "s3-run-id") + self.assertIsInstance(backend, S3Backend) + mock_from_workflow_run.assert_called_once() + # Verify the backend uses the RunOutputRoot + self.assertEqual(backend.run_root, mock_run_root) + self.assertEqual(backend.bucket, "test-bucket") if __name__ == "__main__": diff --git a/build_tools/tests/run_outputs_test.py b/build_tools/tests/run_outputs_test.py new file mode 100644 index 00000000000..58229b47eac --- /dev/null +++ b/build_tools/tests/run_outputs_test.py @@ -0,0 +1,400 @@ +#!/usr/bin/env python +"""Unit tests for run_outputs.py (RunOutputRoot).""" + +import os +import sys +import unittest +from pathlib import Path +from unittest import mock + +sys.path.insert(0, os.fspath(Path(__file__).parent.parent)) + +from _therock_utils.run_outputs import RunOutputRoot + + +class TestRunOutputRootProperties(unittest.TestCase): + """Tests for RunOutputRoot basic properties.""" + + def test_prefix_without_external_repo(self): + """Test prefix for main ROCm/TheRock repo (no external_repo).""" + root = RunOutputRoot( + bucket="therock-ci-artifacts", + external_repo="", + run_id="12345678901", + platform="linux", + ) + self.assertEqual(root.prefix, "12345678901-linux") + + def test_prefix_with_external_repo(self): + """Test prefix for external/fork repos.""" + root = RunOutputRoot( + bucket="therock-ci-artifacts-external", + external_repo="ROCm-TheRock/", + run_id="12345678901", + platform="linux", + ) + self.assertEqual(root.prefix, "ROCm-TheRock/12345678901-linux") + + def test_s3_uri(self): + """Test S3 URI construction.""" + root = RunOutputRoot( + bucket="therock-ci-artifacts", + external_repo="", + run_id="12345678901", + platform="linux", + ) + self.assertEqual(root.s3_uri, "s3://therock-ci-artifacts/12345678901-linux") + + def test_s3_uri_with_external_repo(self): + """Test S3 URI with external repo prefix.""" + root = RunOutputRoot( + bucket="therock-ci-artifacts-external", + external_repo="ROCm-TheRock/", + run_id="12345678901", + platform="windows", + ) + self.assertEqual( + root.s3_uri, + "s3://therock-ci-artifacts-external/ROCm-TheRock/12345678901-windows", + ) + + def test_https_url(self): + """Test HTTPS URL construction.""" + root = RunOutputRoot( + bucket="therock-ci-artifacts", + external_repo="", + run_id="12345678901", + platform="linux", + ) + self.assertEqual( + root.https_url, + "https://therock-ci-artifacts.s3.amazonaws.com/12345678901-linux", + ) + + def test_local_path(self): + """Test local path construction.""" + root = RunOutputRoot( + bucket="local", + external_repo="", + run_id="local-123", + platform="linux", + ) + staging_dir = Path("/tmp/staging") + expected = staging_dir / "local-123-linux" + self.assertEqual(root.local_path(staging_dir), expected) + + def test_local_path_with_external_repo(self): + """Test local path with external repo prefix.""" + root = RunOutputRoot( + bucket="local", + external_repo="ROCm-TheRock/", + run_id="12345", + platform="windows", + ) + staging_dir = Path("/tmp/staging") + expected = staging_dir / "ROCm-TheRock" / "12345-windows" + self.assertEqual(root.local_path(staging_dir), expected) + + +class TestRunOutputRootArtifactPaths(unittest.TestCase): + """Tests for artifact-related path methods.""" + + def setUp(self): + self.root = RunOutputRoot( + bucket="therock-ci-artifacts", + external_repo="", + run_id="12345678901", + platform="linux", + ) + + def test_artifact_s3_key(self): + """Test S3 key for artifact file.""" + key = self.root.artifact_s3_key("blas_lib_gfx94X.tar.xz") + self.assertEqual(key, "12345678901-linux/blas_lib_gfx94X.tar.xz") + + def test_artifact_s3_uri(self): + """Test S3 URI for artifact file.""" + uri = self.root.artifact_s3_uri("blas_lib_gfx94X.tar.xz") + self.assertEqual( + uri, "s3://therock-ci-artifacts/12345678901-linux/blas_lib_gfx94X.tar.xz" + ) + + def test_artifact_https_url(self): + """Test HTTPS URL for artifact file.""" + url = self.root.artifact_https_url("blas_lib_gfx94X.tar.xz") + self.assertEqual( + url, + "https://therock-ci-artifacts.s3.amazonaws.com/12345678901-linux/blas_lib_gfx94X.tar.xz", + ) + + def test_artifact_index_s3_key(self): + """Test S3 key for artifact index file.""" + key = self.root.artifact_index_s3_key("gfx94X-dcgpu") + self.assertEqual(key, "12345678901-linux/index-gfx94X-dcgpu.html") + + def test_artifact_index_s3_uri(self): + """Test S3 URI for artifact index file.""" + uri = self.root.artifact_index_s3_uri("gfx94X-dcgpu") + self.assertEqual( + uri, "s3://therock-ci-artifacts/12345678901-linux/index-gfx94X-dcgpu.html" + ) + + def test_artifact_index_url(self): + """Test HTTPS URL for artifact index file.""" + url = self.root.artifact_index_url("gfx94X-dcgpu") + self.assertEqual( + url, + "https://therock-ci-artifacts.s3.amazonaws.com/12345678901-linux/index-gfx94X-dcgpu.html", + ) + + +class TestRunOutputRootLogPaths(unittest.TestCase): + """Tests for log-related path methods.""" + + def setUp(self): + self.root = RunOutputRoot( + bucket="therock-ci-artifacts", + external_repo="", + run_id="12345678901", + platform="linux", + ) + + def test_logs_prefix(self): + """Test S3 prefix for logs directory.""" + prefix = self.root.logs_prefix("gfx94X-dcgpu") + self.assertEqual(prefix, "12345678901-linux/logs/gfx94X-dcgpu") + + def test_logs_s3_uri(self): + """Test S3 URI for logs directory.""" + uri = self.root.logs_s3_uri("gfx94X-dcgpu") + self.assertEqual( + uri, "s3://therock-ci-artifacts/12345678901-linux/logs/gfx94X-dcgpu" + ) + + def test_logs_https_url(self): + """Test HTTPS URL for logs directory.""" + url = self.root.logs_https_url("gfx94X-dcgpu") + self.assertEqual( + url, + "https://therock-ci-artifacts.s3.amazonaws.com/12345678901-linux/logs/gfx94X-dcgpu", + ) + + def test_log_file_s3_key(self): + """Test S3 key for a specific log file.""" + key = self.root.log_file_s3_key("gfx94X-dcgpu", "build.log") + self.assertEqual(key, "12345678901-linux/logs/gfx94X-dcgpu/build.log") + + def test_log_index_url(self): + """Test HTTPS URL for log index.""" + url = self.root.log_index_url("gfx94X-dcgpu") + self.assertEqual( + url, + "https://therock-ci-artifacts.s3.amazonaws.com/12345678901-linux/logs/gfx94X-dcgpu/index.html", + ) + + def test_build_time_analysis_url(self): + """Test HTTPS URL for build time analysis.""" + url = self.root.build_time_analysis_url("gfx94X-dcgpu") + self.assertEqual( + url, + "https://therock-ci-artifacts.s3.amazonaws.com/12345678901-linux/logs/gfx94X-dcgpu/build_time_analysis.html", + ) + + +class TestRunOutputRootManifestPaths(unittest.TestCase): + """Tests for manifest-related path methods.""" + + def setUp(self): + self.root = RunOutputRoot( + bucket="therock-ci-artifacts", + external_repo="", + run_id="12345678901", + platform="linux", + ) + + def test_manifests_prefix(self): + """Test S3 prefix for manifests directory.""" + prefix = self.root.manifests_prefix("gfx94X-dcgpu") + self.assertEqual(prefix, "12345678901-linux/manifests/gfx94X-dcgpu") + + def test_manifest_s3_key(self): + """Test S3 key for manifest file.""" + key = self.root.manifest_s3_key("gfx94X-dcgpu") + self.assertEqual( + key, "12345678901-linux/manifests/gfx94X-dcgpu/therock_manifest.json" + ) + + def test_manifest_s3_uri(self): + """Test S3 URI for manifest file.""" + uri = self.root.manifest_s3_uri("gfx94X-dcgpu") + self.assertEqual( + uri, + "s3://therock-ci-artifacts/12345678901-linux/manifests/gfx94X-dcgpu/therock_manifest.json", + ) + + def test_manifest_url(self): + """Test HTTPS URL for manifest file.""" + url = self.root.manifest_url("gfx94X-dcgpu") + self.assertEqual( + url, + "https://therock-ci-artifacts.s3.amazonaws.com/12345678901-linux/manifests/gfx94X-dcgpu/therock_manifest.json", + ) + + +class TestRunOutputRootFuturePaths(unittest.TestCase): + """Tests for future output types (python packages, native packages, reports).""" + + def setUp(self): + self.root = RunOutputRoot( + bucket="therock-ci-artifacts", + external_repo="", + run_id="12345678901", + platform="linux", + ) + + def test_python_prefix(self): + """Test S3 prefix for Python packages.""" + prefix = self.root.python_prefix("gfx94X-dcgpu") + self.assertEqual(prefix, "12345678901-linux/python/gfx94X-dcgpu") + + def test_python_s3_uri(self): + """Test S3 URI for Python packages directory.""" + uri = self.root.python_s3_uri("gfx94X-dcgpu") + self.assertEqual( + uri, "s3://therock-ci-artifacts/12345678901-linux/python/gfx94X-dcgpu" + ) + + def test_python_package_s3_key(self): + """Test S3 key for a Python package file.""" + key = self.root.python_package_s3_key("gfx94X-dcgpu", "rocm_sdk-1.0.0.whl") + self.assertEqual( + key, "12345678901-linux/python/gfx94X-dcgpu/rocm_sdk-1.0.0.whl" + ) + + def test_packages_prefix(self): + """Test S3 prefix for native packages.""" + prefix = self.root.packages_prefix("gfx94X-dcgpu") + self.assertEqual(prefix, "12345678901-linux/packages/gfx94X-dcgpu") + + def test_packages_s3_uri(self): + """Test S3 URI for native packages directory.""" + uri = self.root.packages_s3_uri("gfx94X-dcgpu") + self.assertEqual( + uri, "s3://therock-ci-artifacts/12345678901-linux/packages/gfx94X-dcgpu" + ) + + def test_native_package_s3_key(self): + """Test S3 key for a native package file.""" + key = self.root.native_package_s3_key("gfx94X-dcgpu", "rocm-sdk_1.0.0.deb") + self.assertEqual( + key, "12345678901-linux/packages/gfx94X-dcgpu/rocm-sdk_1.0.0.deb" + ) + + def test_reports_prefix(self): + """Test S3 prefix for reports.""" + prefix = self.root.reports_prefix("gfx94X-dcgpu") + self.assertEqual(prefix, "12345678901-linux/reports/gfx94X-dcgpu") + + def test_reports_s3_uri(self): + """Test S3 URI for reports directory.""" + uri = self.root.reports_s3_uri("gfx94X-dcgpu") + self.assertEqual( + uri, "s3://therock-ci-artifacts/12345678901-linux/reports/gfx94X-dcgpu" + ) + + def test_report_s3_key(self): + """Test S3 key for a report file.""" + key = self.root.report_s3_key("gfx94X-dcgpu", "test_results.html") + self.assertEqual( + key, "12345678901-linux/reports/gfx94X-dcgpu/test_results.html" + ) + + def test_report_url(self): + """Test HTTPS URL for a report file.""" + url = self.root.report_url("gfx94X-dcgpu", "test_results.html") + self.assertEqual( + url, + "https://therock-ci-artifacts.s3.amazonaws.com/12345678901-linux/reports/gfx94X-dcgpu/test_results.html", + ) + + +class TestRunOutputRootFactoryMethods(unittest.TestCase): + """Tests for RunOutputRoot factory methods.""" + + def test_for_local_default_platform(self): + """Test for_local with default platform detection.""" + root = RunOutputRoot.for_local(run_id="test-123") + self.assertEqual(root.bucket, "local") + self.assertEqual(root.external_repo, "") + self.assertEqual(root.run_id, "test-123") + # Platform should be detected from system + self.assertIn(root.platform, ["linux", "windows", "darwin"]) + + def test_for_local_explicit_platform(self): + """Test for_local with explicit platform.""" + root = RunOutputRoot.for_local(run_id="test-456", platform="windows") + self.assertEqual(root.run_id, "test-456") + self.assertEqual(root.platform, "windows") + self.assertEqual(root.prefix, "test-456-windows") + + def test_for_local_custom_bucket(self): + """Test for_local with custom bucket name.""" + root = RunOutputRoot.for_local( + run_id="test-789", platform="linux", bucket="custom-bucket" + ) + self.assertEqual(root.bucket, "custom-bucket") + + # Note: from_workflow_run tests are skipped because the method uses a relative + # import (from ..github_actions.github_actions_utils) which doesn't work when + # running tests outside the package context. The method is tested indirectly + # via integration tests in artifact_backend_test.py and post_build_upload usage. + + @unittest.skip("Requires package context for relative import") + def test_from_workflow_run_main_repo(self): + """Test from_workflow_run for main ROCm/TheRock repo.""" + pass + + @unittest.skip("Requires package context for relative import") + def test_from_workflow_run_fork(self): + """Test from_workflow_run for fork/external repo.""" + pass + + @unittest.skip("Requires package context for relative import") + def test_from_workflow_run_with_github_repository(self): + """Test from_workflow_run passes github_repository to retrieve_bucket_info.""" + pass + + @unittest.skip("Requires package context for relative import") + def test_from_workflow_run_with_workflow_run_dict(self): + """Test from_workflow_run passes workflow_run dict to retrieve_bucket_info.""" + pass + + +class TestRunOutputRootImmutability(unittest.TestCase): + """Tests that RunOutputRoot is immutable (frozen dataclass).""" + + def test_cannot_modify_bucket(self): + """Test that bucket cannot be modified after creation.""" + root = RunOutputRoot( + bucket="original", + external_repo="", + run_id="123", + platform="linux", + ) + with self.assertRaises(AttributeError): + root.bucket = "modified" + + def test_cannot_modify_run_id(self): + """Test that run_id cannot be modified after creation.""" + root = RunOutputRoot( + bucket="bucket", + external_repo="", + run_id="original", + platform="linux", + ) + with self.assertRaises(AttributeError): + root.run_id = "modified" + + +if __name__ == "__main__": + unittest.main() From 029eab6b7b0d79f55044316b76902ce86afe3020 Mon Sep 17 00:00:00 2001 From: Scott Todd Date: Mon, 19 Jan 2026 12:11:28 -0800 Subject: [PATCH 06/17] Add documentation for CI run outputs layout Documents the S3 directory structure for workflow run outputs including artifacts, logs, manifests, and future output types (python packages, native packages, reports). Covers: - S3 path structure and naming conventions - Multi-platform and multi-group organization - Fork/external repository handling - RunOutputRoot API reference and usage examples Co-Authored-By: Claude --- docs/development/README.md | 1 + docs/development/run_outputs_layout.md | 178 +++++++++++++++++++++++++ 2 files changed, 179 insertions(+) create mode 100644 docs/development/run_outputs_layout.md diff --git a/docs/development/README.md b/docs/development/README.md index 58a7fd5c356..7e92e5dff67 100644 --- a/docs/development/README.md +++ b/docs/development/README.md @@ -14,6 +14,7 @@ - [Dependencies](dependencies.md) - [Development Guide](development_guide.md) - [Installing Artifacts](installing_artifacts.md) +- [Run Outputs Layout](run_outputs_layout.md) - [Sanitizers](sanitizers.md) ### Testing diff --git a/docs/development/run_outputs_layout.md b/docs/development/run_outputs_layout.md new file mode 100644 index 00000000000..7ef3fba4c55 --- /dev/null +++ b/docs/development/run_outputs_layout.md @@ -0,0 +1,178 @@ +# CI Run Outputs Layout + +This document describes the directory structure for workflow run outputs stored in S3. For information about how build artifacts are created locally, see [artifacts.md](artifacts.md). + +## Overview + +A "run output" is anything produced by a CI workflow run: + +- **Build artifacts** - `.tar.xz` or `.tar.zst` archives of compiled components +- **Logs** - Build logs, ninja timing logs, log indexes +- **Manifests** - `therock_manifest.json` describing the build +- **Reports** - Build time analysis, test reports +- **Python packages** - `.whl` files (future) +- **Native packages** - `.deb`, `.rpm` files (future) + +There is a 1:1 mapping between a GitHub Actions workflow run ID and a run outputs directory. + +## S3 Structure + +### Root Path + +Each workflow run has a root directory in S3: + +``` +s3://{bucket}/{external_repo}{run_id}-{platform}/ +``` + +Where: + +| Field | Description | Example | +| --------------- | --------------------------------------------------- | ---------------------- | +| `bucket` | S3 bucket name | `therock-ci-artifacts` | +| `external_repo` | Empty for ROCm/TheRock, `{owner}-{repo}/` for forks | \`\`, `MyOrg-TheRock/` | +| `run_id` | GitHub Actions workflow run ID | `12345678901` | +| `platform` | Build platform | `linux`, `windows` | + +### Directory Layout + +``` +{run_id}-{platform}/ +├── {name}_{component}_{family}.tar.xz # Build artifacts (at root) +├── {name}_{component}_{family}.tar.xz.sha256sum +├── index-{artifact_group}.html # Per-group artifact index +├── logs/{artifact_group}/ +│ ├── *.log # Build logs +│ ├── ninja_logs.tar.gz # Ninja timing logs +│ ├── index.html # Log index +│ └── build_time_analysis.html # Build timing (Linux only) +├── manifests/{artifact_group}/ +│ └── therock_manifest.json # Build manifest +├── python/{artifact_group}/ # [future] +│ ├── *.whl +│ └── *.tar.gz +├── packages/{artifact_group}/ # [future] +│ ├── *.deb +│ └── *.rpm +└── reports/{artifact_group}/ # [future] + └── *.html +``` + +### Naming Conventions + +| Field | Description | Examples | +| ---------------- | ------------------------------ | ----------------------------------------- | +| `name` | Artifact name | `blas`, `core-hip`, `amd-llvm` | +| `component` | Role of files contained | `dev`, `lib`, `run`, `test`, `dbg`, `doc` | +| `family` | Target GPU family or `generic` | `generic`, `gfx94X`, `gfx1100` | +| `artifact_group` | Build variant identifier | `gfx94X-dcgpu`, `gfx110X-all` | + +## Multi-Platform / Multi-Group Organization + +A single workflow run may produce outputs for multiple platforms and artifact groups. Each platform gets its own root directory: + +``` +s3://therock-ci-artifacts/ +├── 12345678901-linux/ +│ ├── *.tar.xz (artifacts for all Linux artifact groups) +│ ├── index-gfx94X-dcgpu.html +│ ├── index-gfx110X-all.html +│ ├── logs/gfx94X-dcgpu/ +│ ├── logs/gfx110X-all/ +│ └── ... +└── 12345678901-windows/ + ├── *.tar.xz (artifacts for all Windows artifact groups) + ├── index-gfx110X-dgpu.html + └── ... +``` + +Multiple CI jobs (different GPU families, build variants) upload to the same run directory, differentiated by `artifact_group` in subdirectory names and index filenames. + +## Fork/External Repository Handling + +Builds from forks or external repositories use a different S3 bucket and include a prefix to namespace their outputs: + +| Source | Bucket | External Repo Prefix | +| ------------- | ------------------------------- | -------------------- | +| ROCm/TheRock | `therock-ci-artifacts` | (empty) | +| Fork/external | `therock-ci-artifacts-external` | `{owner}-{repo}/` | + +Example paths: + +``` +# Main repo +s3://therock-ci-artifacts/12345678901-linux/ + +# Fork (e.g., MyOrg/TheRock) +s3://therock-ci-artifacts-external/MyOrg-TheRock/12345678901-linux/ +``` + +## Public Access + +Artifacts are publicly accessible via HTTPS: + +``` +https://{bucket}.s3.amazonaws.com/{prefix}/... +``` + +Example URLs: + +- Artifact index: `https://therock-ci-artifacts.s3.amazonaws.com/12345678901-linux/index-gfx94X-dcgpu.html` +- Build logs: `https://therock-ci-artifacts.s3.amazonaws.com/12345678901-linux/logs/gfx94X-dcgpu/index.html` +- Manifest: `https://therock-ci-artifacts.s3.amazonaws.com/12345678901-linux/manifests/gfx94X-dcgpu/therock_manifest.json` + +## Using RunOutputRoot + +All path computation should go through the `RunOutputRoot` class in `build_tools/_therock_utils/run_outputs.py`. This ensures consistent paths across all tools. + +### Basic Usage + +```python +from _therock_utils.run_outputs import RunOutputRoot + +# From CI environment (uses GITHUB_REPOSITORY to determine bucket) +root = RunOutputRoot.from_workflow_run( + run_id="12345678901", + platform="linux", +) + +# Access paths +print(root.s3_uri) # s3://bucket/12345678901-linux +print(root.https_url) # https://bucket.s3.amazonaws.com/... +print(root.artifact_index_url("gfx94X-dcgpu")) # ...index-gfx94X-dcgpu.html +print(root.logs_s3_uri("gfx94X-dcgpu")) # s3://bucket/.../logs/gfx94X-dcgpu +print( + root.manifest_url("gfx94X-dcgpu") +) # ...manifests/gfx94X-dcgpu/therock_manifest.json +``` + +### Local Development + +For local testing without S3: + +```python +root = RunOutputRoot.for_local(run_id="local-test", platform="linux") +local_dir = root.local_path(Path("/tmp/staging")) +# Returns: /tmp/staging/local-test-linux/ +``` + +### Available Methods + +| Category | Methods | +| ----------------- | ----------------------------------------------------------------------------------------------------- | +| Root paths | `prefix`, `s3_uri`, `https_url`, `local_path()` | +| Artifacts | `artifact_s3_key()`, `artifact_s3_uri()`, `artifact_https_url()`, `artifact_index_*()` | +| Logs | `logs_prefix()`, `logs_s3_uri()`, `log_file_s3_key()`, `log_index_url()`, `build_time_analysis_url()` | +| Manifests | `manifests_prefix()`, `manifest_s3_key()`, `manifest_s3_uri()`, `manifest_url()` | +| Python (future) | `python_prefix()`, `python_s3_uri()`, `python_package_s3_key()` | +| Packages (future) | `packages_prefix()`, `packages_s3_uri()`, `native_package_s3_key()` | +| Reports (future) | `reports_prefix()`, `reports_s3_uri()`, `report_s3_key()`, `report_url()` | + +## Related Files + +| File | Purpose | +| ---------------------------------------------------- | --------------------------------------------- | +| `build_tools/_therock_utils/run_outputs.py` | `RunOutputRoot` class for path computation | +| `build_tools/_therock_utils/artifact_backend.py` | Storage backend abstraction (S3 or local) | +| `build_tools/github_actions/post_build_upload.py` | Uploads artifacts, logs, manifests to S3 | +| `build_tools/github_actions/github_actions_utils.py` | `retrieve_bucket_info()` for bucket selection | From 689e2076b7fe106e9e3159a92a75d7fc27e39f0f Mon Sep 17 00:00:00 2001 From: Scott Todd Date: Mon, 19 Jan 2026 12:21:45 -0800 Subject: [PATCH 07/17] Move retrieve_bucket_info import to module top level Per Python style guide, imports should be at the top of the file. Add sys.path.insert to enable importing from sibling github_actions package. This also enables the from_workflow_run tests that were previously skipped. Co-Authored-By: Claude --- build_tools/_therock_utils/run_outputs.py | 9 +++- build_tools/tests/run_outputs_test.py | 60 ++++++++++++++++------- 2 files changed, 50 insertions(+), 19 deletions(-) diff --git a/build_tools/_therock_utils/run_outputs.py b/build_tools/_therock_utils/run_outputs.py index 8098fd190c0..54efe2936db 100644 --- a/build_tools/_therock_utils/run_outputs.py +++ b/build_tools/_therock_utils/run_outputs.py @@ -87,10 +87,17 @@ local_dir = root.local_path(Path("/tmp/staging")) """ +import os +import sys from dataclasses import dataclass from pathlib import Path import platform as platform_module +# Add build_tools to path for sibling package imports +sys.path.insert(0, os.fspath(Path(__file__).parent.parent)) + +from github_actions.github_actions_utils import retrieve_bucket_info + @dataclass(frozen=True) class RunOutputRoot: @@ -312,8 +319,6 @@ def from_workflow_run( Returns: RunOutputRoot configured for the workflow run. """ - from ..github_actions.github_actions_utils import retrieve_bucket_info - external_repo, bucket = retrieve_bucket_info( github_repository=github_repository, workflow_run=workflow_run, diff --git a/build_tools/tests/run_outputs_test.py b/build_tools/tests/run_outputs_test.py index 58229b47eac..18af29197f9 100644 --- a/build_tools/tests/run_outputs_test.py +++ b/build_tools/tests/run_outputs_test.py @@ -344,30 +344,56 @@ def test_for_local_custom_bucket(self): ) self.assertEqual(root.bucket, "custom-bucket") - # Note: from_workflow_run tests are skipped because the method uses a relative - # import (from ..github_actions.github_actions_utils) which doesn't work when - # running tests outside the package context. The method is tested indirectly - # via integration tests in artifact_backend_test.py and post_build_upload usage. - - @unittest.skip("Requires package context for relative import") - def test_from_workflow_run_main_repo(self): + @mock.patch("_therock_utils.run_outputs.retrieve_bucket_info") + def test_from_workflow_run_main_repo(self, mock_retrieve): """Test from_workflow_run for main ROCm/TheRock repo.""" - pass + mock_retrieve.return_value = ("", "therock-ci-artifacts") + + root = RunOutputRoot.from_workflow_run(run_id="12345678901", platform="linux") + + self.assertEqual(root.bucket, "therock-ci-artifacts") + self.assertEqual(root.external_repo, "") + self.assertEqual(root.run_id, "12345678901") + self.assertEqual(root.platform, "linux") + self.assertEqual(root.prefix, "12345678901-linux") - @unittest.skip("Requires package context for relative import") - def test_from_workflow_run_fork(self): + @mock.patch("_therock_utils.run_outputs.retrieve_bucket_info") + def test_from_workflow_run_fork(self, mock_retrieve): """Test from_workflow_run for fork/external repo.""" - pass + mock_retrieve.return_value = ("MyOrg-TheRock/", "therock-ci-artifacts-external") + + root = RunOutputRoot.from_workflow_run(run_id="12345678901", platform="windows") - @unittest.skip("Requires package context for relative import") - def test_from_workflow_run_with_github_repository(self): + self.assertEqual(root.bucket, "therock-ci-artifacts-external") + self.assertEqual(root.external_repo, "MyOrg-TheRock/") + self.assertEqual(root.prefix, "MyOrg-TheRock/12345678901-windows") + + @mock.patch("_therock_utils.run_outputs.retrieve_bucket_info") + def test_from_workflow_run_with_github_repository(self, mock_retrieve): """Test from_workflow_run passes github_repository to retrieve_bucket_info.""" - pass + mock_retrieve.return_value = ("", "therock-ci-artifacts") - @unittest.skip("Requires package context for relative import") - def test_from_workflow_run_with_workflow_run_dict(self): + RunOutputRoot.from_workflow_run( + run_id="123", platform="linux", github_repository="ROCm/TheRock" + ) + + mock_retrieve.assert_called_once_with( + github_repository="ROCm/TheRock", workflow_run=None + ) + + @mock.patch("_therock_utils.run_outputs.retrieve_bucket_info") + def test_from_workflow_run_with_workflow_run_dict(self, mock_retrieve): """Test from_workflow_run passes workflow_run dict to retrieve_bucket_info.""" - pass + mock_retrieve.return_value = ("", "therock-ci-artifacts") + workflow_run = {"id": 123, "repository": {"full_name": "ROCm/TheRock"}} + + RunOutputRoot.from_workflow_run( + run_id="123", platform="linux", workflow_run=workflow_run + ) + + mock_retrieve.assert_called_once_with( + github_repository=None, workflow_run=workflow_run + ) class TestRunOutputRootImmutability(unittest.TestCase): From 8d86b4c2756127c60e84aab497b512cf391daad4 Mon Sep 17 00:00:00 2001 From: Scott Todd Date: Mon, 19 Jan 2026 12:42:08 -0800 Subject: [PATCH 08/17] Fix: Pass run_id to retrieve_bucket_info and improve tests Bug fix: - from_workflow_run() now passes run_id as workflow_run_id to retrieve_bucket_info(), which uses it to fetch workflow data from GitHub API when workflow_run dict is not provided. Test improvements: - Removed mocks of retrieve_bucket_info - tests now call the real function for proper integration testing - Tests provide workflow_run dicts to control bucket selection without making API calls - Added test for legacy bucket (pre-cutover date workflows) - Tests now verify actual bucket selection logic: fork detection, external repo handling, date-based bucket selection Co-Authored-By: Claude --- build_tools/_therock_utils/run_outputs.py | 1 + build_tools/tests/run_outputs_test.py | 108 +++++++++++++++------- 2 files changed, 74 insertions(+), 35 deletions(-) diff --git a/build_tools/_therock_utils/run_outputs.py b/build_tools/_therock_utils/run_outputs.py index 54efe2936db..432fed5f708 100644 --- a/build_tools/_therock_utils/run_outputs.py +++ b/build_tools/_therock_utils/run_outputs.py @@ -321,6 +321,7 @@ def from_workflow_run( """ external_repo, bucket = retrieve_bucket_info( github_repository=github_repository, + workflow_run_id=run_id, workflow_run=workflow_run, ) return cls( diff --git a/build_tools/tests/run_outputs_test.py b/build_tools/tests/run_outputs_test.py index 18af29197f9..092c9730980 100644 --- a/build_tools/tests/run_outputs_test.py +++ b/build_tools/tests/run_outputs_test.py @@ -344,12 +344,24 @@ def test_for_local_custom_bucket(self): ) self.assertEqual(root.bucket, "custom-bucket") - @mock.patch("_therock_utils.run_outputs.retrieve_bucket_info") - def test_from_workflow_run_main_repo(self, mock_retrieve): + def test_from_workflow_run_main_repo(self): """Test from_workflow_run for main ROCm/TheRock repo.""" - mock_retrieve.return_value = ("", "therock-ci-artifacts") - - root = RunOutputRoot.from_workflow_run(run_id="12345678901", platform="linux") + # Provide workflow_run dict to avoid API call - this is the standard + # pattern since from_workflow_run always passes run_id to retrieve_bucket_info + workflow_run = { + "id": 12345678901, + "head_repository": {"full_name": "ROCm/TheRock"}, + "updated_at": "2025-12-01T12:00:00Z", # After bucket cutover date + "status": "completed", + "html_url": "https://github.com/ROCm/TheRock/actions/runs/12345678901", + } + + root = RunOutputRoot.from_workflow_run( + run_id="12345678901", + platform="linux", + github_repository="ROCm/TheRock", + workflow_run=workflow_run, + ) self.assertEqual(root.bucket, "therock-ci-artifacts") self.assertEqual(root.external_repo, "") @@ -357,43 +369,69 @@ def test_from_workflow_run_main_repo(self, mock_retrieve): self.assertEqual(root.platform, "linux") self.assertEqual(root.prefix, "12345678901-linux") - @mock.patch("_therock_utils.run_outputs.retrieve_bucket_info") - def test_from_workflow_run_fork(self, mock_retrieve): - """Test from_workflow_run for fork/external repo.""" - mock_retrieve.return_value = ("MyOrg-TheRock/", "therock-ci-artifacts-external") - - root = RunOutputRoot.from_workflow_run(run_id="12345678901", platform="windows") - - self.assertEqual(root.bucket, "therock-ci-artifacts-external") - self.assertEqual(root.external_repo, "MyOrg-TheRock/") - self.assertEqual(root.prefix, "MyOrg-TheRock/12345678901-windows") - - @mock.patch("_therock_utils.run_outputs.retrieve_bucket_info") - def test_from_workflow_run_with_github_repository(self, mock_retrieve): - """Test from_workflow_run passes github_repository to retrieve_bucket_info.""" - mock_retrieve.return_value = ("", "therock-ci-artifacts") - - RunOutputRoot.from_workflow_run( - run_id="123", platform="linux", github_repository="ROCm/TheRock" + def test_from_workflow_run_external_repo(self): + """Test from_workflow_run for external repo (non-TheRock).""" + workflow_run = { + "id": 12345678901, + "head_repository": {"full_name": "SomeOrg/SomeRepo"}, + "updated_at": "2025-12-01T12:00:00Z", + "status": "completed", + "html_url": "https://github.com/SomeOrg/SomeRepo/actions/runs/12345678901", + } + + root = RunOutputRoot.from_workflow_run( + run_id="12345678901", + platform="windows", + github_repository="SomeOrg/SomeRepo", + workflow_run=workflow_run, ) - mock_retrieve.assert_called_once_with( - github_repository="ROCm/TheRock", workflow_run=None + self.assertEqual(root.bucket, "therock-ci-artifacts-external") + self.assertEqual(root.external_repo, "SomeOrg-SomeRepo/") + self.assertEqual(root.prefix, "SomeOrg-SomeRepo/12345678901-windows") + + def test_from_workflow_run_fork(self): + """Test from_workflow_run for fork PR (head_repo != base_repo).""" + # Workflow from fork - head_repository differs from github_repository + workflow_run = { + "id": 12345678901, + "head_repository": {"full_name": "SomeUser/TheRock"}, # Fork + "updated_at": "2025-12-01T12:00:00Z", + "status": "completed", + "html_url": "https://github.com/ROCm/TheRock/actions/runs/12345678901", + } + + root = RunOutputRoot.from_workflow_run( + run_id="12345678901", + platform="linux", + github_repository="ROCm/TheRock", + workflow_run=workflow_run, ) - @mock.patch("_therock_utils.run_outputs.retrieve_bucket_info") - def test_from_workflow_run_with_workflow_run_dict(self, mock_retrieve): - """Test from_workflow_run passes workflow_run dict to retrieve_bucket_info.""" - mock_retrieve.return_value = ("", "therock-ci-artifacts") - workflow_run = {"id": 123, "repository": {"full_name": "ROCm/TheRock"}} + self.assertEqual(root.bucket, "therock-ci-artifacts-external") + self.assertEqual(root.external_repo, "ROCm-TheRock/") + self.assertEqual(root.prefix, "ROCm-TheRock/12345678901-linux") - RunOutputRoot.from_workflow_run( - run_id="123", platform="linux", workflow_run=workflow_run + def test_from_workflow_run_old_bucket(self): + """Test from_workflow_run with old workflow date uses legacy bucket.""" + workflow_run = { + "id": 12345678901, + "head_repository": {"full_name": "ROCm/TheRock"}, + "updated_at": "2025-10-01T12:00:00Z", # Before bucket cutover date + "status": "completed", + "html_url": "https://github.com/ROCm/TheRock/actions/runs/12345678901", + } + + root = RunOutputRoot.from_workflow_run( + run_id="12345678901", + platform="linux", + github_repository="ROCm/TheRock", + workflow_run=workflow_run, ) - mock_retrieve.assert_called_once_with( - github_repository=None, workflow_run=workflow_run - ) + # Old runs use legacy bucket + self.assertEqual(root.bucket, "therock-artifacts") + self.assertEqual(root.external_repo, "") class TestRunOutputRootImmutability(unittest.TestCase): From 6bd01e8f6e8de464b80a5dfd7b0fd1414a1cc54e Mon Sep 17 00:00:00 2001 From: Scott Todd Date: Mon, 19 Jan 2026 12:54:24 -0800 Subject: [PATCH 09/17] Remove [future] API methods from RunOutputRoot Remove unimplemented methods for python packages, native packages, and reports. These can be added when actually needed. Keep documentation section explaining how to add new output types, so the design pattern is clear for future extensions. Co-Authored-By: Claude --- build_tools/_therock_utils/run_outputs.py | 64 +------------------ build_tools/tests/run_outputs_test.py | 77 ----------------------- docs/development/run_outputs_layout.md | 44 ++++++------- 3 files changed, 25 insertions(+), 160 deletions(-) diff --git a/build_tools/_therock_utils/run_outputs.py b/build_tools/_therock_utils/run_outputs.py index 432fed5f708..0bd2944504f 100644 --- a/build_tools/_therock_utils/run_outputs.py +++ b/build_tools/_therock_utils/run_outputs.py @@ -9,8 +9,6 @@ - Logs (.log files, ninja_logs.tar.gz) - Manifests (therock_manifest.json) - Reports (build_time_analysis.html, test reports) -- Python packages (.whl, .tar.gz sdists) [future] -- Native packages (.deb, .rpm) [future] Layout Structure ---------------- @@ -26,14 +24,8 @@ │ ├── ninja_logs.tar.gz # Ninja timing logs │ ├── index.html # Log index │ └── build_time_analysis.html # Build timing (Linux) - ├── manifests/{artifact_group}/ - │ └── therock_manifest.json # Build manifest - ├── python/{artifact_group}/ # Python packages [future] - │ ├── *.whl - │ └── *.tar.gz - └── packages/{artifact_group}/ # Native packages [future] - ├── *.deb - └── *.rpm + └── manifests/{artifact_group}/ + └── therock_manifest.json # Build manifest Where: - {root} = {external_repo}{run_id}-{platform} @@ -239,58 +231,6 @@ def manifest_url(self, artifact_group: str) -> str: """Public URL for therock_manifest.json.""" return f"{self.https_url}/manifests/{artifact_group}/therock_manifest.json" - # ------------------------------------------------------------------------- - # Python packages (.whl, .tar.gz) [future] - # ------------------------------------------------------------------------- - - def python_prefix(self, artifact_group: str) -> str: - """S3 key prefix for Python packages directory.""" - return f"{self.prefix}/python/{artifact_group}" - - def python_s3_uri(self, artifact_group: str) -> str: - """S3 URI for Python packages directory.""" - return f"s3://{self.bucket}/{self.python_prefix(artifact_group)}" - - def python_package_s3_key(self, artifact_group: str, filename: str) -> str: - """S3 key for a Python package file (.whl or .tar.gz).""" - return f"{self.python_prefix(artifact_group)}/{filename}" - - # ------------------------------------------------------------------------- - # Native packages (.deb, .rpm) [future] - # ------------------------------------------------------------------------- - - def packages_prefix(self, artifact_group: str) -> str: - """S3 key prefix for native packages directory.""" - return f"{self.prefix}/packages/{artifact_group}" - - def packages_s3_uri(self, artifact_group: str) -> str: - """S3 URI for native packages directory.""" - return f"s3://{self.bucket}/{self.packages_prefix(artifact_group)}" - - def native_package_s3_key(self, artifact_group: str, filename: str) -> str: - """S3 key for a native package file (.deb, .rpm).""" - return f"{self.packages_prefix(artifact_group)}/{filename}" - - # ------------------------------------------------------------------------- - # Reports (.html) [future] - # ------------------------------------------------------------------------- - - def reports_prefix(self, artifact_group: str) -> str: - """S3 key prefix for reports directory.""" - return f"{self.prefix}/reports/{artifact_group}" - - def reports_s3_uri(self, artifact_group: str) -> str: - """S3 URI for reports directory.""" - return f"s3://{self.bucket}/{self.reports_prefix(artifact_group)}" - - def report_s3_key(self, artifact_group: str, filename: str) -> str: - """S3 key for a report file (.html).""" - return f"{self.reports_prefix(artifact_group)}/{filename}" - - def report_url(self, artifact_group: str, filename: str) -> str: - """Public URL for a report file.""" - return f"{self.https_url}/reports/{artifact_group}/{filename}" - # ------------------------------------------------------------------------- # Factory methods # ------------------------------------------------------------------------- diff --git a/build_tools/tests/run_outputs_test.py b/build_tools/tests/run_outputs_test.py index 092c9730980..5ec0d34e5b4 100644 --- a/build_tools/tests/run_outputs_test.py +++ b/build_tools/tests/run_outputs_test.py @@ -241,83 +241,6 @@ def test_manifest_url(self): ) -class TestRunOutputRootFuturePaths(unittest.TestCase): - """Tests for future output types (python packages, native packages, reports).""" - - def setUp(self): - self.root = RunOutputRoot( - bucket="therock-ci-artifacts", - external_repo="", - run_id="12345678901", - platform="linux", - ) - - def test_python_prefix(self): - """Test S3 prefix for Python packages.""" - prefix = self.root.python_prefix("gfx94X-dcgpu") - self.assertEqual(prefix, "12345678901-linux/python/gfx94X-dcgpu") - - def test_python_s3_uri(self): - """Test S3 URI for Python packages directory.""" - uri = self.root.python_s3_uri("gfx94X-dcgpu") - self.assertEqual( - uri, "s3://therock-ci-artifacts/12345678901-linux/python/gfx94X-dcgpu" - ) - - def test_python_package_s3_key(self): - """Test S3 key for a Python package file.""" - key = self.root.python_package_s3_key("gfx94X-dcgpu", "rocm_sdk-1.0.0.whl") - self.assertEqual( - key, "12345678901-linux/python/gfx94X-dcgpu/rocm_sdk-1.0.0.whl" - ) - - def test_packages_prefix(self): - """Test S3 prefix for native packages.""" - prefix = self.root.packages_prefix("gfx94X-dcgpu") - self.assertEqual(prefix, "12345678901-linux/packages/gfx94X-dcgpu") - - def test_packages_s3_uri(self): - """Test S3 URI for native packages directory.""" - uri = self.root.packages_s3_uri("gfx94X-dcgpu") - self.assertEqual( - uri, "s3://therock-ci-artifacts/12345678901-linux/packages/gfx94X-dcgpu" - ) - - def test_native_package_s3_key(self): - """Test S3 key for a native package file.""" - key = self.root.native_package_s3_key("gfx94X-dcgpu", "rocm-sdk_1.0.0.deb") - self.assertEqual( - key, "12345678901-linux/packages/gfx94X-dcgpu/rocm-sdk_1.0.0.deb" - ) - - def test_reports_prefix(self): - """Test S3 prefix for reports.""" - prefix = self.root.reports_prefix("gfx94X-dcgpu") - self.assertEqual(prefix, "12345678901-linux/reports/gfx94X-dcgpu") - - def test_reports_s3_uri(self): - """Test S3 URI for reports directory.""" - uri = self.root.reports_s3_uri("gfx94X-dcgpu") - self.assertEqual( - uri, "s3://therock-ci-artifacts/12345678901-linux/reports/gfx94X-dcgpu" - ) - - def test_report_s3_key(self): - """Test S3 key for a report file.""" - key = self.root.report_s3_key("gfx94X-dcgpu", "test_results.html") - self.assertEqual( - key, "12345678901-linux/reports/gfx94X-dcgpu/test_results.html" - ) - - def test_report_url(self): - """Test HTTPS URL for a report file.""" - url = self.root.report_url("gfx94X-dcgpu", "test_results.html") - self.assertEqual( - url, - "https://therock-ci-artifacts.s3.amazonaws.com/12345678901-linux/reports/gfx94X-dcgpu/test_results.html", - ) - - class TestRunOutputRootFactoryMethods(unittest.TestCase): """Tests for RunOutputRoot factory methods.""" diff --git a/docs/development/run_outputs_layout.md b/docs/development/run_outputs_layout.md index 7ef3fba4c55..507b68cc8b7 100644 --- a/docs/development/run_outputs_layout.md +++ b/docs/development/run_outputs_layout.md @@ -10,8 +10,6 @@ A "run output" is anything produced by a CI workflow run: - **Logs** - Build logs, ninja timing logs, log indexes - **Manifests** - `therock_manifest.json` describing the build - **Reports** - Build time analysis, test reports -- **Python packages** - `.whl` files (future) -- **Native packages** - `.deb`, `.rpm` files (future) There is a 1:1 mapping between a GitHub Actions workflow run ID and a run outputs directory. @@ -46,16 +44,8 @@ Where: │ ├── ninja_logs.tar.gz # Ninja timing logs │ ├── index.html # Log index │ └── build_time_analysis.html # Build timing (Linux only) -├── manifests/{artifact_group}/ -│ └── therock_manifest.json # Build manifest -├── python/{artifact_group}/ # [future] -│ ├── *.whl -│ └── *.tar.gz -├── packages/{artifact_group}/ # [future] -│ ├── *.deb -│ └── *.rpm -└── reports/{artifact_group}/ # [future] - └── *.html +└── manifests/{artifact_group}/ + └── therock_manifest.json # Build manifest ``` ### Naming Conventions @@ -158,15 +148,27 @@ local_dir = root.local_path(Path("/tmp/staging")) ### Available Methods -| Category | Methods | -| ----------------- | ----------------------------------------------------------------------------------------------------- | -| Root paths | `prefix`, `s3_uri`, `https_url`, `local_path()` | -| Artifacts | `artifact_s3_key()`, `artifact_s3_uri()`, `artifact_https_url()`, `artifact_index_*()` | -| Logs | `logs_prefix()`, `logs_s3_uri()`, `log_file_s3_key()`, `log_index_url()`, `build_time_analysis_url()` | -| Manifests | `manifests_prefix()`, `manifest_s3_key()`, `manifest_s3_uri()`, `manifest_url()` | -| Python (future) | `python_prefix()`, `python_s3_uri()`, `python_package_s3_key()` | -| Packages (future) | `packages_prefix()`, `packages_s3_uri()`, `native_package_s3_key()` | -| Reports (future) | `reports_prefix()`, `reports_s3_uri()`, `report_s3_key()`, `report_url()` | +| Category | Methods | +| --------- | ----------------------------------------------------------------------------------------------------- | +| Root | `prefix`, `s3_uri`, `https_url`, `local_path()` | +| Artifacts | `artifact_s3_key()`, `artifact_s3_uri()`, `artifact_https_url()`, `artifact_index_*()` | +| Logs | `logs_prefix()`, `logs_s3_uri()`, `log_file_s3_key()`, `log_index_url()`, `build_time_analysis_url()` | +| Manifests | `manifests_prefix()`, `manifest_s3_key()`, `manifest_s3_uri()`, `manifest_url()` | + +### Adding New Output Types + +To add a new output type (e.g., Python packages, native packages, reports): + +1. Add methods to `RunOutputRoot` following the existing pattern: + + - `{type}_prefix(artifact_group)` - S3 key prefix + - `{type}_s3_uri(artifact_group)` - Full S3 URI + - `{type}_s3_key(artifact_group, filename)` - S3 key for specific file + - `{type}_url(artifact_group, filename)` - Public HTTPS URL (if applicable) + +1. Update the upload script to use the new methods + +1. Update this documentation ## Related Files From 66982ad8c6c60ece2e5d6f8d92c3ca7848d06aa4 Mon Sep 17 00:00:00 2001 From: Scott Todd Date: Mon, 19 Jan 2026 13:22:28 -0800 Subject: [PATCH 10/17] Formating tweaks to docs --- docs/development/run_outputs_layout.md | 65 ++++++++++++++------------ 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/docs/development/run_outputs_layout.md b/docs/development/run_outputs_layout.md index 507b68cc8b7..8163996726d 100644 --- a/docs/development/run_outputs_layout.md +++ b/docs/development/run_outputs_layout.md @@ -1,6 +1,7 @@ # CI Run Outputs Layout -This document describes the directory structure for workflow run outputs stored in S3. For information about how build artifacts are created locally, see [artifacts.md](artifacts.md). +This document describes the directory structure for workflow run outputs stored in S3. +For information about how build artifacts are created locally, see [artifacts.md](artifacts.md). ## Overview @@ -8,10 +9,17 @@ A "run output" is anything produced by a CI workflow run: - **Build artifacts** - `.tar.xz` or `.tar.zst` archives of compiled components - **Logs** - Build logs, ninja timing logs, log indexes -- **Manifests** - `therock_manifest.json` describing the build +- **Manifests** - `therock_manifest.json` containing source control metadata - **Reports** - Build time analysis, test reports +- *(and other files which may be added later)* -There is a 1:1 mapping between a GitHub Actions workflow run ID and a run outputs directory. +Each job for a given workflow run uploads its outputs to a central directory and +there is a 1:1 mapping between a workflow run ID and a run outputs directory. + +> [!NOTE] +> As our current CI/CD system is built on GitHub Actions, we use the "workflow run" +> and "workflow run id" terminology from GitHub Actions, but this system could also +> work using a different CI/CD system. ## S3 Structure @@ -25,12 +33,12 @@ s3://{bucket}/{external_repo}{run_id}-{platform}/ Where: -| Field | Description | Example | -| --------------- | --------------------------------------------------- | ---------------------- | -| `bucket` | S3 bucket name | `therock-ci-artifacts` | -| `external_repo` | Empty for ROCm/TheRock, `{owner}-{repo}/` for forks | \`\`, `MyOrg-TheRock/` | -| `run_id` | GitHub Actions workflow run ID | `12345678901` | -| `platform` | Build platform | `linux`, `windows` | +| Field | Description | Example | +| --------------- | --------------------------------------------------- | ------------------------- | +| `bucket` | S3 bucket name | `therock-ci-artifacts` | +| `external_repo` | Empty for ROCm/TheRock, `{owner}-{repo}/` for forks | (empty), `MyOrg-TheRock/` | +| `run_id` | GitHub Actions workflow run ID | `12345678901` | +| `platform` | Build platform | `linux`, `windows` | ### Directory Layout @@ -59,7 +67,8 @@ Where: ## Multi-Platform / Multi-Group Organization -A single workflow run may produce outputs for multiple platforms and artifact groups. Each platform gets its own root directory: +A single workflow run may produce outputs for multiple platforms and artifact groups. +Each platform gets its own root directory: ``` s3://therock-ci-artifacts/ @@ -76,11 +85,14 @@ s3://therock-ci-artifacts/ └── ... ``` -Multiple CI jobs (different GPU families, build variants) upload to the same run directory, differentiated by `artifact_group` in subdirectory names and index filenames. +Multiple CI jobs (different GPU families, build variants) upload to the same run +directory, differentiated by `artifact_group` in subdirectory names and index +filenames. ## Fork/External Repository Handling -Builds from forks or external repositories use a different S3 bucket and include a prefix to namespace their outputs: +Builds from forks or external repositories use a different S3 bucket and include +a prefix to namespace their outputs: | Source | Bucket | External Repo Prefix | | ------------- | ------------------------------- | -------------------- | @@ -113,15 +125,17 @@ Example URLs: ## Using RunOutputRoot -All path computation should go through the `RunOutputRoot` class in `build_tools/_therock_utils/run_outputs.py`. This ensures consistent paths across all tools. +All path computation should go through the `RunOutputRoot` class in +`build_tools/_therock_utils/run_outputs.py`. This ensures consistent paths +across all tools. ### Basic Usage ```python from _therock_utils.run_outputs import RunOutputRoot -# From CI environment (uses GITHUB_REPOSITORY to determine bucket) root = RunOutputRoot.from_workflow_run( + github_repository="ROCm/TheRock", # Or omit to infer from GITHUB_REPOSITORY run_id="12345678901", platform="linux", ) @@ -146,20 +160,11 @@ local_dir = root.local_path(Path("/tmp/staging")) # Returns: /tmp/staging/local-test-linux/ ``` -### Available Methods - -| Category | Methods | -| --------- | ----------------------------------------------------------------------------------------------------- | -| Root | `prefix`, `s3_uri`, `https_url`, `local_path()` | -| Artifacts | `artifact_s3_key()`, `artifact_s3_uri()`, `artifact_https_url()`, `artifact_index_*()` | -| Logs | `logs_prefix()`, `logs_s3_uri()`, `log_file_s3_key()`, `log_index_url()`, `build_time_analysis_url()` | -| Manifests | `manifests_prefix()`, `manifest_s3_key()`, `manifest_s3_uri()`, `manifest_url()` | - ### Adding New Output Types To add a new output type (e.g., Python packages, native packages, reports): -1. Add methods to `RunOutputRoot` following the existing pattern: +1. Add methods to `RunOutputRoot` following the existing patterns: - `{type}_prefix(artifact_group)` - S3 key prefix - `{type}_s3_uri(artifact_group)` - Full S3 URI @@ -172,9 +177,9 @@ To add a new output type (e.g., Python packages, native packages, reports): ## Related Files -| File | Purpose | -| ---------------------------------------------------- | --------------------------------------------- | -| `build_tools/_therock_utils/run_outputs.py` | `RunOutputRoot` class for path computation | -| `build_tools/_therock_utils/artifact_backend.py` | Storage backend abstraction (S3 or local) | -| `build_tools/github_actions/post_build_upload.py` | Uploads artifacts, logs, manifests to S3 | -| `build_tools/github_actions/github_actions_utils.py` | `retrieve_bucket_info()` for bucket selection | +| File | Purpose | +| ----------------------------------------------------------------------------------------------------------- | --------------------------------------------- | +| [`build_tools/_therock_utils/run_outputs.py`](/build_tools/_therock_utils/run_outputs.py) | `RunOutputRoot` class for path computation | +| [`build_tools/_therock_utils/artifact_backend.py`](/build_tools/_therock_utils/artifact_backend.py) | Storage backend abstraction (S3 or local) | +| [`build_tools/github_actions/post_build_upload.py`](/build_tools/github_actions/post_build_upload.py) | Uploads artifacts, logs, manifests to S3 | +| [`build_tools/github_actions/github_actions_utils.py`](/build_tools/github_actions/github_actions_utils.py) | `retrieve_bucket_info()` for bucket selection | From e3805b6ebef6dc57b756bc07c518aeb75322a463 Mon Sep 17 00:00:00 2001 From: Scott Todd Date: Mon, 19 Jan 2026 13:24:19 -0800 Subject: [PATCH 11/17] Add S3 bucket listing with browse links to documentation Add new "S3 Buckets" subsection with table of all buckets used for CI artifacts, including browse links to the public S3 bucket indexes. Includes both current and legacy buckets. Co-Authored-By: Claude --- docs/development/run_outputs_layout.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/docs/development/run_outputs_layout.md b/docs/development/run_outputs_layout.md index 8163996726d..99108219f81 100644 --- a/docs/development/run_outputs_layout.md +++ b/docs/development/run_outputs_layout.md @@ -23,6 +23,24 @@ there is a 1:1 mapping between a workflow run ID and a run outputs directory. ## S3 Structure +### S3 Buckets + +Run outputs are stored in public S3 buckets. The bucket used depends on the source repository and release type: + +| Bucket | Purpose | Browse | +| ------------------------------- | ------------------------------------------ | ----------------------------------------------------------------- | +| `therock-ci-artifacts` | Main CI artifacts from ROCm/TheRock | [Browse](https://therock-ci-artifacts.s3.amazonaws.com/) | +| `therock-ci-artifacts-external` | CI artifacts from forks and external repos | [Browse](https://therock-ci-artifacts-external.s3.amazonaws.com/) | +| `therock-nightly-artifacts` | Nightly release builds | [Browse](https://therock-nightly-artifacts.s3.amazonaws.com/) | +| `therock-release-artifacts` | Official release builds | [Browse](https://therock-release-artifacts.s3.amazonaws.com/) | + +Legacy buckets (for older workflow runs): + +| Bucket | Purpose | Browse | +| ---------------------------- | ------------------------------ | -------------------------------------------------------------- | +| `therock-artifacts` | Legacy main CI artifacts | [Browse](https://therock-artifacts.s3.amazonaws.com/) | +| `therock-artifacts-external` | Legacy external repo artifacts | [Browse](https://therock-artifacts-external.s3.amazonaws.com/) | + ### Root Path Each workflow run has a root directory in S3: From 5a70de46ce90fcfc1787fe5e165c3bc33bef14b3 Mon Sep 17 00:00:00 2001 From: Scott Todd Date: Mon, 19 Jan 2026 15:16:12 -0800 Subject: [PATCH 12/17] Migrate fetch_artifacts.py and upload_test_report_script.py to RunOutputRoot - upload_test_report_script.py: Use RunOutputRoot.from_workflow_run() instead of retrieve_bucket_info() + manual URI construction - fetch_artifacts.py: Remove BucketMetadata class (duplicated RunOutputRoot functionality), use RunOutputRoot directly This consolidates all bucket/path computation through RunOutputRoot, leaving retrieve_bucket_info() as an internal implementation detail used only by run_outputs.py. Co-Authored-By: Claude --- build_tools/fetch_artifacts.py | 51 +++++-------------- .../upload_test_report_script.py | 16 +++--- 2 files changed, 22 insertions(+), 45 deletions(-) diff --git a/build_tools/fetch_artifacts.py b/build_tools/fetch_artifacts.py index 981b7af8aac..6055253238f 100644 --- a/build_tools/fetch_artifacts.py +++ b/build_tools/fetch_artifacts.py @@ -36,7 +36,7 @@ from botocore import UNSIGNED from botocore.config import Config import concurrent.futures -from dataclasses import dataclass, field +from dataclasses import dataclass import os from pathlib import Path import platform @@ -53,7 +53,7 @@ ArtifactPopulator, _open_archive_for_read, ) -from github_actions.github_actions_utils import retrieve_bucket_info +from _therock_utils.run_outputs import RunOutputRoot warnings.filterwarnings("ignore", category=InsecureRequestWarning) @@ -88,29 +88,13 @@ def log(*args, **kwargs): sys.stdout.flush() -# TODO: move into github_actions_utils.py? -@dataclass -class BucketMetadata: - """Metadata for a workflow run's artifacts in an AWS S3 bucket.""" - - external_repo: str - bucket: str - workflow_run_id: str - platform: str - s3_key_path: str = field(init=False) - - def __post_init__(self): - self.s3_key_path = f"{self.external_repo}{self.workflow_run_id}-{self.platform}" - - -def list_s3_artifacts(bucket_info: BucketMetadata, artifact_group: str) -> set[str]: +def list_s3_artifacts(run_root: RunOutputRoot, artifact_group: str) -> set[str]: """Checks that the AWS S3 bucket exists and returns artifact names.""" - s3_key_path = bucket_info.s3_key_path log( - f"Retrieving S3 artifacts for {bucket_info.workflow_run_id} in '{bucket_info.bucket}' at '{s3_key_path}'" + f"Retrieving S3 artifacts for {run_root.run_id} in '{run_root.bucket}' at '{run_root.prefix}'" ) - page_iterator = paginator.paginate(Bucket=bucket_info.bucket, Prefix=s3_key_path) + page_iterator = paginator.paginate(Bucket=run_root.bucket, Prefix=run_root.prefix) data = set() for page in page_iterator: if not "Contents" in page: @@ -128,7 +112,7 @@ def list_s3_artifacts(bucket_info: BucketMetadata, artifact_group: str) -> set[s file_name = artifact_key.split("/")[-1] data.add(file_name) if not data: - log(f"Found no S3 artifacts for {bucket_info.run_id} at '{s3_key_path}'") + log(f"Found no S3 artifacts for {run_root.run_id} at '{run_root.prefix}'") return data @@ -214,7 +198,7 @@ def download_artifacts(artifact_download_requests: list[ArtifactDownloadRequest] def get_artifact_download_requests( - bucket_info: BucketMetadata, + run_root: RunOutputRoot, s3_artifacts: set[str], output_dir: Path, ) -> list[ArtifactDownloadRequest]: @@ -224,8 +208,8 @@ def get_artifact_download_requests( for artifact in sorted(list(s3_artifacts)): artifacts_to_download.append( ArtifactDownloadRequest( - artifact_key=f"{bucket_info.s3_key_path}/{artifact}", - bucket=bucket_info.bucket, + artifact_key=f"{run_root.prefix}/{artifact}", + bucket=run_root.bucket, output_path=output_dir / artifact, ) ) @@ -279,24 +263,17 @@ def run(args): output_dir = args.output_dir output_dir.mkdir(parents=True, exist_ok=True) - external_repo, bucket = retrieve_bucket_info( - github_repository=run_github_repo, - workflow_run_id=run_id, - ) - bucket_info = BucketMetadata( - external_repo=external_repo, - bucket=bucket, - workflow_run_id=run_id, + run_root = RunOutputRoot.from_workflow_run( + run_id=run_id, platform=args.platform, + github_repository=run_github_repo, ) # Lookup which artifacts exist in the bucket. # Note: this currently does not check that all requested artifacts # (via include patterns) do exist, so this may silently fail to fetch # expected files. - s3_artifacts = list_s3_artifacts( - bucket_info=bucket_info, artifact_group=artifact_group - ) + s3_artifacts = list_s3_artifacts(run_root=run_root, artifact_group=artifact_group) if not s3_artifacts: log(f"No matching artifacts for {run_id} exist. Exiting...") sys.exit(1) @@ -308,7 +285,7 @@ def run(args): sys.exit(1) artifacts_to_download = get_artifact_download_requests( - bucket_info=bucket_info, + run_root=run_root, s3_artifacts=s3_artifacts_filtered, output_dir=output_dir, ) diff --git a/build_tools/github_actions/upload_test_report_script.py b/build_tools/github_actions/upload_test_report_script.py index 3082c92f062..53f99a63b48 100644 --- a/build_tools/github_actions/upload_test_report_script.py +++ b/build_tools/github_actions/upload_test_report_script.py @@ -11,14 +11,16 @@ import shlex import subprocess import sys -from github_actions.github_actions_utils import retrieve_bucket_info - - -logging.basicConfig(level=logging.INFO) THEROCK_DIR = Path(__file__).resolve().parent.parent.parent PLATFORM = platform.system().lower() +# Add build_tools to path for _therock_utils imports +sys.path.insert(0, str(THEROCK_DIR / "build_tools")) +from _therock_utils.run_outputs import RunOutputRoot + +logging.basicConfig(level=logging.INFO) + # Importing indexer.py sys.path.append(str(THEROCK_DIR / "third-party" / "indexer")) from indexer import process_dir @@ -83,9 +85,7 @@ def upload_test_report(report_dir: Path, bucket_uri: str, log_destination: str): def run(args: argparse.Namespace): - external_repo_path, bucket = retrieve_bucket_info() - run_id = args.run_id - bucket_uri = f"s3://{bucket}/{external_repo_path}{run_id}-{PLATFORM}" + run_root = RunOutputRoot.from_workflow_run(run_id=args.run_id, platform=PLATFORM) if not args.report_path.exists(): logging.error( @@ -94,7 +94,7 @@ def run(args: argparse.Namespace): return create_index_file(args) - upload_test_report(args.report_path, bucket_uri, args.log_destination) + upload_test_report(args.report_path, run_root.s3_uri, args.log_destination) def main(argv): From f6e601324f729b0083e79971311bee040445a5bc Mon Sep 17 00:00:00 2001 From: Scott Todd Date: Mon, 19 Jan 2026 15:38:30 -0800 Subject: [PATCH 13/17] Consolidate bucket selection logic into RunOutputRoot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move retrieve_bucket_info() from github_actions_utils.py into run_outputs.py as a private _retrieve_bucket_info() function. This makes RunOutputRoot the single source of truth for all S3 path and bucket computation. Changes: - Move _retrieve_bucket_info() into run_outputs.py - Delete retrieve_bucket_info() from github_actions_utils.py - Migrate tests to run_outputs_test.py (test through public API) - Add RELEASE_TYPE env var tests for nightly/release buckets - Add integration tests with @skipUnless(GITHUB_TOKEN) - Keep github_actions_utils.py for generic GitHub API code only 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- build_tools/_therock_utils/run_outputs.py | 113 +++++++++- .../github_actions/github_actions_utils.py | 107 --------- .../tests/github_actions_utils_test.py | 204 +----------------- build_tools/tests/run_outputs_test.py | 146 +++++++++++++ 4 files changed, 261 insertions(+), 309 deletions(-) diff --git a/build_tools/_therock_utils/run_outputs.py b/build_tools/_therock_utils/run_outputs.py index 0bd2944504f..f269e5f8e5b 100644 --- a/build_tools/_therock_utils/run_outputs.py +++ b/build_tools/_therock_utils/run_outputs.py @@ -82,13 +82,116 @@ import os import sys from dataclasses import dataclass +from datetime import datetime, timezone from pathlib import Path import platform as platform_module # Add build_tools to path for sibling package imports sys.path.insert(0, os.fspath(Path(__file__).parent.parent)) -from github_actions.github_actions_utils import retrieve_bucket_info +from github_actions.github_actions_utils import gha_query_workflow_run_by_id + + +def _log(*args, **kwargs): + """Log to stdout with flush for CI visibility.""" + print(*args, **kwargs) + sys.stdout.flush() + + +# Cutover date for bucket naming change (TheRock #2046). +# Workflows before this date used therock-artifacts, after use therock-ci-artifacts. +_BUCKET_CUTOVER_DATE = datetime.fromisoformat("2025-11-11T16:18:48+00:00") + + +def _retrieve_bucket_info( + github_repository: str | None = None, + workflow_run_id: str | None = None, + workflow_run: dict | None = None, +) -> tuple[str, str]: + """Determine S3 bucket and external_repo prefix for a workflow run. + + This is an internal implementation detail - use RunOutputRoot.from_workflow_run() + instead of calling this directly. + + Bucket selection is based on: + - Repository (ROCm/TheRock vs external) + - Whether the PR is from a fork + - RELEASE_TYPE environment variable (nightly, release) + - Workflow run date (for legacy bucket compatibility) + + Args: + github_repository: Repository in 'owner/repo' format. If None, + uses GITHUB_REPOSITORY env var or defaults to 'ROCm/TheRock'. + workflow_run_id: Workflow run ID. If provided without workflow_run, + fetches workflow data from GitHub API. + workflow_run: Workflow run dict from GitHub API. If provided, + avoids an API call. + + Returns: + Tuple of (external_repo, bucket) where: + - external_repo: '' for ROCm/TheRock, or '{owner}-{repo}/' for forks/external + - bucket: S3 bucket name + """ + _log("Retrieving bucket info...") + + if github_repository: + _log(f" (explicit) github_repository: {github_repository}") + else: + github_repository = os.getenv("GITHUB_REPOSITORY", "ROCm/TheRock") + _log(f" (implicit) github_repository: {github_repository}") + + # Fetch workflow_run from API if not provided but workflow_run_id is set + if workflow_run is None and workflow_run_id is not None: + workflow_run = gha_query_workflow_run_by_id(github_repository, workflow_run_id) + + # Extract metadata from workflow_run if available + curr_commit_dt = None + if workflow_run is not None: + _log(f" workflow_run_id : {workflow_run['id']}") + head_github_repository = workflow_run["head_repository"]["full_name"] + is_pr_from_fork = head_github_repository != github_repository + _log(f" head_github_repository : {head_github_repository}") + _log(f" is_pr_from_fork : {is_pr_from_fork}") + + curr_commit_dt = datetime.strptime( + workflow_run["updated_at"], "%Y-%m-%dT%H:%M:%SZ" + ) + curr_commit_dt = curr_commit_dt.replace(tzinfo=timezone.utc) + else: + is_pr_from_fork = os.getenv("IS_PR_FROM_FORK", "false") == "true" + _log(f" (implicit) is_pr_from_fork : {is_pr_from_fork}") + + owner, repo_name = github_repository.split("/") + external_repo = ( + "" + if repo_name == "TheRock" and owner == "ROCm" and not is_pr_from_fork + else f"{owner}-{repo_name}/" + ) + + release_type = os.getenv("RELEASE_TYPE") + if release_type: + _log(f" (implicit) RELEASE_TYPE: {release_type}") + bucket = f"therock-{release_type}-artifacts" + else: + if external_repo == "": + bucket = "therock-ci-artifacts" + if curr_commit_dt and curr_commit_dt <= _BUCKET_CUTOVER_DATE: + bucket = "therock-artifacts" + elif ( + repo_name == "therock-releases-internal" + and owner == "ROCm" + and not is_pr_from_fork + ): + bucket = "therock-artifacts-internal" + else: + bucket = "therock-ci-artifacts-external" + if curr_commit_dt and curr_commit_dt <= _BUCKET_CUTOVER_DATE: + bucket = "therock-artifacts-external" + + _log("Retrieved bucket info:") + _log(f" external_repo: {external_repo}") + _log(f" bucket : {bucket}") + return (external_repo, bucket) @dataclass(frozen=True) @@ -245,8 +348,8 @@ def from_workflow_run( ) -> "RunOutputRoot": """Create from workflow context. - Uses retrieve_bucket_info() to determine the appropriate S3 bucket - and external_repo prefix based on the repository and workflow run. + Determines the appropriate S3 bucket and external_repo prefix based + on the repository and workflow run metadata. Args: run_id: GitHub Actions workflow run ID. @@ -254,12 +357,12 @@ def from_workflow_run( github_repository: Repository in 'owner/repo' format. If None, uses GITHUB_REPOSITORY env var or defaults to 'ROCm/TheRock'. workflow_run: Optional workflow run dict from GitHub API. If - provided, avoids an API call in retrieve_bucket_info(). + provided, avoids a GitHub API call. Returns: RunOutputRoot configured for the workflow run. """ - external_repo, bucket = retrieve_bucket_info( + external_repo, bucket = _retrieve_bucket_info( github_repository=github_repository, workflow_run_id=run_id, workflow_run=workflow_run, diff --git a/build_tools/github_actions/github_actions_utils.py b/build_tools/github_actions/github_actions_utils.py index df91ef93dae..ef3a4c2dea0 100644 --- a/build_tools/github_actions/github_actions_utils.py +++ b/build_tools/github_actions/github_actions_utils.py @@ -3,7 +3,6 @@ See also https://pypi.org/project/github-action-utils/. """ -from datetime import datetime, timezone import json import os from pathlib import Path @@ -226,112 +225,6 @@ def gha_query_last_successful_workflow_run( return None -def retrieve_bucket_info( - github_repository: str | None = None, - workflow_run_id: str | None = None, - workflow_run: dict | None = None, -) -> tuple[str, str]: - """Given a github repository and a workflow run, retrieves bucket information. - - This is intended to segment artifacts by repository and trust level, with - artifacts split across several buckets: - * therock-ci-artifacts - * therock-ci-artifacts-external - * therock-artifacts-internal - * therock-dev-artifacts - * therock-nightly-artifacts - * therock-release-artifacts - - Typically while run as a continious CI/CD pipeline, this function should - return the same bucket information for each stage of the pipeline. While - testing workflows, it can be useful to reference artifacts from other - repositories and arbitrary buckets to avoid rebuilding. Those test cases - can use the explicit |github_repository| and |workflow_run_id| parameters. - - Priority for |github_repository| is: - 1. The function argument - 2. The GITHUB_REPOSITORY environment variable - 3. The default, "ROCm/TheRock" - - If |workflow_run| is provided, uses it directly without making an API call. - Otherwise, if |workflow_run_id| is provided, fetches the workflow run data - from the GitHub API. - - If neither is provided, |is_pr_from_fork| is populated from the - IS_PR_FROM_FORK environment variable. - - Returns a tuple [EXTERNAL_REPO, BUCKET], where: - - EXTERNAL_REPO = if CI is run on an external repo, we create a S3 sub-folder - to avoid conflicting run IDs - - BUCKET = the name of the S3 bucket - """ - - _log("Retrieving bucket info...") - - if github_repository: - _log(f" (explicit) github_repository: {github_repository}") - else: - # Default to the current repository (if any), else ROCm/TheRock. - github_repository = os.getenv("GITHUB_REPOSITORY", "ROCm/TheRock") - _log(f" (implicit) github_repository: {github_repository}") - - # Fetch workflow_run from API if not provided but workflow_run_id is set - if workflow_run is None and workflow_run_id is not None: - workflow_run = gha_query_workflow_run_by_id(github_repository, workflow_run_id) - - # Extract metadata from workflow_run if available - curr_commit_dt = None - if workflow_run is not None: - _log(f" workflow_run_id : {workflow_run['id']}") - head_github_repository = workflow_run["head_repository"]["full_name"] - is_pr_from_fork = head_github_repository != github_repository - _log(f" head_github_repository : {head_github_repository}") - _log(f" is_pr_from_fork : {is_pr_from_fork}") - - # From TheRock #2046 onward, a new S3 bucket was used. - # This datetime comparison will determine whether to download from older bucket or newer bucket. - curr_commit_dt = datetime.strptime( - workflow_run["updated_at"], "%Y-%m-%dT%H:%M:%SZ" - ) - curr_commit_dt = curr_commit_dt.replace(tzinfo=timezone.utc) - commit_to_compare_dt = datetime.fromisoformat("2025-11-11T16:18:48+00:00") - else: - is_pr_from_fork = os.getenv("IS_PR_FROM_FORK", "false") == "true" - _log(f" (implicit) is_pr_from_fork : {is_pr_from_fork}") - - owner, repo_name = github_repository.split("/") - external_repo = ( - "" - if repo_name == "TheRock" and owner == "ROCm" and not is_pr_from_fork - else f"{owner}-{repo_name}/" - ) - - release_type = os.getenv("RELEASE_TYPE") - if release_type: - _log(f" (implicit) RELEASE_TYPE: {release_type}") - bucket = f"therock-{release_type}-artifacts" - else: - if external_repo == "": - bucket = "therock-ci-artifacts" - if curr_commit_dt and curr_commit_dt <= commit_to_compare_dt: - bucket = "therock-artifacts" - elif ( - repo_name == "therock-releases-internal" - and owner == "ROCm" - and not is_pr_from_fork - ): - bucket = "therock-artifacts-internal" - else: - bucket = "therock-ci-artifacts-external" - if curr_commit_dt and curr_commit_dt <= commit_to_compare_dt: - bucket = "therock-artifacts-external" - - _log("Retrieved bucket info:") - _log(f" external_repo: {external_repo}") - _log(f" bucket : {bucket}") - return (external_repo, bucket) - - def str2bool(value: str | None) -> bool: """Convert environment variables to boolean values.""" if not value: diff --git a/build_tools/github_actions/tests/github_actions_utils_test.py b/build_tools/github_actions/tests/github_actions_utils_test.py index 4f27ffe13cd..60f97130555 100644 --- a/build_tools/github_actions/tests/github_actions_utils_test.py +++ b/build_tools/github_actions/tests/github_actions_utils_test.py @@ -2,34 +2,18 @@ from pathlib import Path import sys import unittest -from unittest.mock import patch sys.path.insert(0, os.fspath(Path(__file__).parent.parent)) -from github_actions_utils import * +from github_actions_utils import ( + gha_query_workflow_run_by_id, + gha_query_workflow_runs_for_commit, + gha_query_last_successful_workflow_run, +) # Note: these tests use the network and require GITHUB_TOKEN to avoid rate limits. class GitHubActionsUtilsTest(unittest.TestCase): - def setUp(self): - # Save environment state - self._saved_env = {} - for key in ["RELEASE_TYPE", "GITHUB_REPOSITORY", "IS_PR_FROM_FORK"]: - if key in os.environ: - self._saved_env[key] = os.environ[key] - # Clean environment for tests - for key in ["RELEASE_TYPE", "GITHUB_REPOSITORY", "IS_PR_FROM_FORK"]: - if key in os.environ: - del os.environ[key] - - def tearDown(self): - # Restore environment state - for key in ["RELEASE_TYPE", "GITHUB_REPOSITORY", "IS_PR_FROM_FORK"]: - if key in os.environ: - del os.environ[key] - for key, value in self._saved_env.items(): - os.environ[key] = value - @unittest.skipUnless( os.getenv("GITHUB_TOKEN"), "GITHUB_TOKEN not set, skipping test that requires GitHub API access", @@ -39,7 +23,7 @@ def test_gha_query_workflow_run_by_id(self): workflow_run = gha_query_workflow_run_by_id("ROCm/TheRock", "18022609292") self.assertEqual(workflow_run["repository"]["full_name"], "ROCm/TheRock") - # Verify fields we depend on in retrieve_bucket_info and find_artifacts_for_commit + # Verify fields we depend on in RunOutputRoot and find_artifacts_for_commit self.assertIn("id", workflow_run) self.assertIn("head_repository", workflow_run) self.assertIn("full_name", workflow_run["head_repository"]) @@ -69,7 +53,7 @@ def test_gha_query_workflow_runs_for_commit_found(self): self.assertIsInstance(runs, list) self.assertGreater(len(runs), 0) - # Verify fields we depend on in retrieve_bucket_info and find_artifacts_for_commit + # Verify fields we depend on in RunOutputRoot and find_artifacts_for_commit run = runs[0] self.assertIn("id", run) self.assertIn("head_repository", run) @@ -117,180 +101,6 @@ def test_gha_query_last_successful_workflow_run(self): "ROCm/TheRock", "nonexistent_workflow_12345.yml", "main" ) - @unittest.skipUnless( - os.getenv("GITHUB_TOKEN"), - "GITHUB_TOKEN not set, skipping test that requires GitHub API access", - ) - def test_retrieve_older_bucket_info(self): - # TODO(geomin12): work on pulling these run IDs more dynamically - # https://github.com/ROCm/TheRock/actions/runs/18022609292?pr=1597 - external_repo, bucket = retrieve_bucket_info("ROCm/TheRock", "18022609292") - self.assertEqual(external_repo, "") - self.assertEqual(bucket, "therock-artifacts") - - @unittest.skipUnless( - os.getenv("GITHUB_TOKEN"), - "GITHUB_TOKEN not set, skipping test that requires GitHub API access", - ) - def test_retrieve_newer_bucket_info(self): - # https://github.com/ROCm/TheRock/actions/runs/19680190301 - external_repo, bucket = retrieve_bucket_info("ROCm/TheRock", "19680190301") - self.assertEqual(external_repo, "") - self.assertEqual(bucket, "therock-ci-artifacts") - - @unittest.skipUnless( - os.getenv("GITHUB_TOKEN"), - "GITHUB_TOKEN not set, skipping test that requires GitHub API access", - ) - def test_retrieve_bucket_info_from_fork(self): - # https://github.com/ROCm/TheRock/actions/runs/18023442478?pr=1596 - external_repo, bucket = retrieve_bucket_info("ROCm/TheRock", "18023442478") - self.assertEqual(external_repo, "ROCm-TheRock/") - self.assertEqual(bucket, "therock-artifacts-external") - - @unittest.skipUnless( - os.getenv("GITHUB_TOKEN"), - "GITHUB_TOKEN not set, skipping test that requires GitHub API access", - ) - def test_retrieve_bucket_info_from_rocm_libraries(self): - # https://github.com/ROCm/rocm-libraries/actions/runs/18020401326?pr=1828 - external_repo, bucket = retrieve_bucket_info( - "ROCm/rocm-libraries", "18020401326" - ) - self.assertEqual(external_repo, "ROCm-rocm-libraries/") - self.assertEqual(bucket, "therock-artifacts-external") - - @unittest.skipUnless( - os.getenv("GITHUB_TOKEN"), - "GITHUB_TOKEN not set, skipping test that requires GitHub API access", - ) - def test_retrieve_newer_bucket_info_from_rocm_libraries(self): - # https://github.com/ROCm/rocm-libraries/actions/runs/19784318631 - external_repo, bucket = retrieve_bucket_info( - "ROCm/rocm-libraries", "19784318631" - ) - self.assertEqual(external_repo, "ROCm-rocm-libraries/") - self.assertEqual(bucket, "therock-ci-artifacts-external") - - @unittest.skipUnless( - os.getenv("GITHUB_TOKEN"), - "GITHUB_TOKEN not set, skipping test that requires GitHub API access", - ) - def test_retrieve_bucket_info_for_release(self): - # https://github.com/ROCm/TheRock/actions/runs/19157864140 - os.environ["RELEASE_TYPE"] = "nightly" - external_repo, bucket = retrieve_bucket_info("ROCm/TheRock", "19157864140") - self.assertEqual(external_repo, "") - self.assertEqual(bucket, "therock-nightly-artifacts") - - def test_retrieve_bucket_info_without_workflow_id(self): - """Test bucket info retrieval without making API calls.""" - # Test default case (no workflow_run_id, no API call) - os.environ["GITHUB_REPOSITORY"] = "ROCm/TheRock" - os.environ["IS_PR_FROM_FORK"] = "false" - external_repo, bucket = retrieve_bucket_info() - self.assertEqual(external_repo, "") - self.assertEqual(bucket, "therock-ci-artifacts") - - # Test external repo case - os.environ["GITHUB_REPOSITORY"] = "SomeOrg/SomeRepo" - external_repo, bucket = retrieve_bucket_info() - self.assertEqual(external_repo, "SomeOrg-SomeRepo/") - self.assertEqual(bucket, "therock-ci-artifacts-external") - - # Test fork case - os.environ["GITHUB_REPOSITORY"] = "ROCm/TheRock" - os.environ["IS_PR_FROM_FORK"] = "true" - external_repo, bucket = retrieve_bucket_info() - self.assertEqual(external_repo, "ROCm-TheRock/") - self.assertEqual(bucket, "therock-ci-artifacts-external") - - # Test release case - os.environ["RELEASE_TYPE"] = "nightly" - os.environ["IS_PR_FROM_FORK"] = "false" - external_repo, bucket = retrieve_bucket_info() - self.assertEqual(external_repo, "") - self.assertEqual(bucket, "therock-nightly-artifacts") - - def test_retrieve_bucket_info_with_workflow_run_skips_api_call(self): - """Test that providing workflow_run skips the API call.""" - # Mock workflow_run data matching the structure from GitHub API - mock_workflow_run = { - "id": 12345678901, - "head_repository": {"full_name": "ROCm/TheRock"}, - "updated_at": "2025-12-01T12:00:00Z", # After the bucket cutover date - "status": "completed", - "html_url": "https://github.com/ROCm/TheRock/actions/runs/12345678901", - } - - with patch("github_actions_utils.gha_send_request") as mock_send_request, patch( - "github_actions_utils.gha_query_workflow_run_by_id" - ) as mock_query_by_id: - external_repo, bucket = retrieve_bucket_info( - github_repository="ROCm/TheRock", - workflow_run=mock_workflow_run, - ) - - # Verify no API calls were made - mock_send_request.assert_not_called() - mock_query_by_id.assert_not_called() - - # Verify correct bucket info based on mock data - self.assertEqual(external_repo, "") - self.assertEqual(bucket, "therock-ci-artifacts") - - def test_retrieve_bucket_info_with_workflow_run_from_fork(self): - """Test workflow_run from a fork returns external bucket.""" - mock_workflow_run = { - "id": 12345678901, - "head_repository": {"full_name": "SomeUser/TheRock"}, # Fork - "updated_at": "2025-12-01T12:00:00Z", - "status": "completed", - "html_url": "https://github.com/ROCm/TheRock/actions/runs/12345678901", - } - - with patch("github_actions_utils.gha_send_request") as mock_send_request, patch( - "github_actions_utils.gha_query_workflow_run_by_id" - ) as mock_query_by_id: - external_repo, bucket = retrieve_bucket_info( - github_repository="ROCm/TheRock", - workflow_run=mock_workflow_run, - ) - - # Verify no API calls were made - mock_send_request.assert_not_called() - mock_query_by_id.assert_not_called() - - # Fork PRs go to external bucket with repo prefix - self.assertEqual(external_repo, "ROCm-TheRock/") - self.assertEqual(bucket, "therock-ci-artifacts-external") - - def test_retrieve_bucket_info_with_workflow_run_old_date(self): - """Test workflow_run with old date returns legacy bucket.""" - mock_workflow_run = { - "id": 12345678901, - "head_repository": {"full_name": "ROCm/TheRock"}, - "updated_at": "2025-10-01T12:00:00Z", # Before the bucket cutover date - "status": "completed", - "html_url": "https://github.com/ROCm/TheRock/actions/runs/12345678901", - } - - with patch("github_actions_utils.gha_send_request") as mock_send_request, patch( - "github_actions_utils.gha_query_workflow_run_by_id" - ) as mock_query_by_id: - external_repo, bucket = retrieve_bucket_info( - github_repository="ROCm/TheRock", - workflow_run=mock_workflow_run, - ) - - # Verify no API calls were made - mock_send_request.assert_not_called() - mock_query_by_id.assert_not_called() - - # Old runs use legacy bucket - self.assertEqual(external_repo, "") - self.assertEqual(bucket, "therock-artifacts") - if __name__ == "__main__": unittest.main() diff --git a/build_tools/tests/run_outputs_test.py b/build_tools/tests/run_outputs_test.py index 5ec0d34e5b4..435267b86b7 100644 --- a/build_tools/tests/run_outputs_test.py +++ b/build_tools/tests/run_outputs_test.py @@ -383,5 +383,151 @@ def test_cannot_modify_run_id(self): root.run_id = "modified" +class TestRunOutputRootReleaseType(unittest.TestCase): + """Tests for RELEASE_TYPE environment variable handling.""" + + def setUp(self): + """Save and clean environment state.""" + self._saved_env = {} + for key in ["RELEASE_TYPE", "GITHUB_REPOSITORY", "IS_PR_FROM_FORK"]: + if key in os.environ: + self._saved_env[key] = os.environ[key] + del os.environ[key] + + def tearDown(self): + """Restore environment state.""" + for key in ["RELEASE_TYPE", "GITHUB_REPOSITORY", "IS_PR_FROM_FORK"]: + if key in os.environ: + del os.environ[key] + for key, value in self._saved_env.items(): + os.environ[key] = value + + def test_nightly_release_bucket(self): + """Test that RELEASE_TYPE=nightly uses nightly bucket.""" + os.environ["RELEASE_TYPE"] = "nightly" + workflow_run = { + "id": 12345678901, + "head_repository": {"full_name": "ROCm/TheRock"}, + "updated_at": "2025-12-01T12:00:00Z", + "status": "completed", + "html_url": "https://github.com/ROCm/TheRock/actions/runs/12345678901", + } + + root = RunOutputRoot.from_workflow_run( + run_id="12345678901", + platform="linux", + github_repository="ROCm/TheRock", + workflow_run=workflow_run, + ) + + self.assertEqual(root.bucket, "therock-nightly-artifacts") + self.assertEqual(root.external_repo, "") + + def test_release_bucket(self): + """Test that RELEASE_TYPE=release uses release bucket.""" + os.environ["RELEASE_TYPE"] = "release" + workflow_run = { + "id": 12345678901, + "head_repository": {"full_name": "ROCm/TheRock"}, + "updated_at": "2025-12-01T12:00:00Z", + "status": "completed", + "html_url": "https://github.com/ROCm/TheRock/actions/runs/12345678901", + } + + root = RunOutputRoot.from_workflow_run( + run_id="12345678901", + platform="linux", + github_repository="ROCm/TheRock", + workflow_run=workflow_run, + ) + + self.assertEqual(root.bucket, "therock-release-artifacts") + self.assertEqual(root.external_repo, "") + + +class TestRunOutputRootIntegration(unittest.TestCase): + """Integration tests that use real GitHub API calls. + + These tests require GITHUB_TOKEN to be set and make network requests. + They verify that from_workflow_run() correctly determines bucket info + for known historical workflow runs. + """ + + @unittest.skipUnless( + os.getenv("GITHUB_TOKEN"), + "GITHUB_TOKEN not set, skipping integration test", + ) + def test_from_workflow_run_older_bucket(self): + """Test workflow run from before bucket cutover uses legacy bucket.""" + # https://github.com/ROCm/TheRock/actions/runs/18022609292?pr=1597 + root = RunOutputRoot.from_workflow_run( + run_id="18022609292", + platform="linux", + github_repository="ROCm/TheRock", + ) + self.assertEqual(root.external_repo, "") + self.assertEqual(root.bucket, "therock-artifacts") + + @unittest.skipUnless( + os.getenv("GITHUB_TOKEN"), + "GITHUB_TOKEN not set, skipping integration test", + ) + def test_from_workflow_run_newer_bucket(self): + """Test workflow run from after bucket cutover uses new bucket.""" + # https://github.com/ROCm/TheRock/actions/runs/19680190301 + root = RunOutputRoot.from_workflow_run( + run_id="19680190301", + platform="linux", + github_repository="ROCm/TheRock", + ) + self.assertEqual(root.external_repo, "") + self.assertEqual(root.bucket, "therock-ci-artifacts") + + @unittest.skipUnless( + os.getenv("GITHUB_TOKEN"), + "GITHUB_TOKEN not set, skipping integration test", + ) + def test_from_workflow_run_fork(self): + """Test workflow run from fork uses external bucket with repo prefix.""" + # https://github.com/ROCm/TheRock/actions/runs/18023442478?pr=1596 + root = RunOutputRoot.from_workflow_run( + run_id="18023442478", + platform="linux", + github_repository="ROCm/TheRock", + ) + self.assertEqual(root.external_repo, "ROCm-TheRock/") + self.assertEqual(root.bucket, "therock-artifacts-external") + + @unittest.skipUnless( + os.getenv("GITHUB_TOKEN"), + "GITHUB_TOKEN not set, skipping integration test", + ) + def test_from_workflow_run_external_repo_older(self): + """Test workflow run from external repo (rocm-libraries) older bucket.""" + # https://github.com/ROCm/rocm-libraries/actions/runs/18020401326?pr=1828 + root = RunOutputRoot.from_workflow_run( + run_id="18020401326", + platform="linux", + github_repository="ROCm/rocm-libraries", + ) + self.assertEqual(root.external_repo, "ROCm-rocm-libraries/") + self.assertEqual(root.bucket, "therock-artifacts-external") + + @unittest.skipUnless( + os.getenv("GITHUB_TOKEN"), + "GITHUB_TOKEN not set, skipping integration test", + ) + def test_from_workflow_run_external_repo_newer(self): + """Test workflow run from external repo (rocm-libraries) newer bucket.""" + # https://github.com/ROCm/rocm-libraries/actions/runs/19784318631 + root = RunOutputRoot.from_workflow_run( + run_id="19784318631", + platform="linux", + github_repository="ROCm/rocm-libraries", + ) + self.assertEqual(root.external_repo, "ROCm-rocm-libraries/") + self.assertEqual(root.bucket, "therock-ci-artifacts-external") + + if __name__ == "__main__": unittest.main() From d2f3ad80fe96ce5dbc1b9be2b79bf0bf6f764c8b Mon Sep 17 00:00:00 2001 From: Scott Todd Date: Mon, 19 Jan 2026 15:49:39 -0800 Subject: [PATCH 14/17] Fix outdated reference in run_outputs_layout.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update Related Files table to reflect that github_actions_utils.py now contains generic GitHub API utilities, not retrieve_bucket_info() which was moved to run_outputs.py. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- docs/development/run_outputs_layout.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/development/run_outputs_layout.md b/docs/development/run_outputs_layout.md index 99108219f81..d9f43aa6717 100644 --- a/docs/development/run_outputs_layout.md +++ b/docs/development/run_outputs_layout.md @@ -200,4 +200,4 @@ To add a new output type (e.g., Python packages, native packages, reports): | [`build_tools/_therock_utils/run_outputs.py`](/build_tools/_therock_utils/run_outputs.py) | `RunOutputRoot` class for path computation | | [`build_tools/_therock_utils/artifact_backend.py`](/build_tools/_therock_utils/artifact_backend.py) | Storage backend abstraction (S3 or local) | | [`build_tools/github_actions/post_build_upload.py`](/build_tools/github_actions/post_build_upload.py) | Uploads artifacts, logs, manifests to S3 | -| [`build_tools/github_actions/github_actions_utils.py`](/build_tools/github_actions/github_actions_utils.py) | `retrieve_bucket_info()` for bucket selection | +| [`build_tools/github_actions/github_actions_utils.py`](/build_tools/github_actions/github_actions_utils.py) | GitHub API utilities (workflow queries, etc.) | From cd0e4091e3872561508b83db72a55256c0849e81 Mon Sep 17 00:00:00 2001 From: Scott Todd Date: Mon, 19 Jan 2026 16:26:31 -0800 Subject: [PATCH 15/17] Update tests to use RunOutputRoot interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tests were using the old BucketMetadata and LocalDirectoryBackend interfaces that were updated in the RunOutputRoot migration. Changes: - fetch_artifacts_test.py: Replace BucketMetadata with RunOutputRoot - artifact_manager_tool_test.py: Update LocalDirectoryBackend calls to use RunOutputRoot instead of separate run_id/platform parameters 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../tests/artifact_manager_tool_test.py | 28 +++++++++++++++---- build_tools/tests/fetch_artifacts_test.py | 20 ++++++++----- 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/build_tools/tests/artifact_manager_tool_test.py b/build_tools/tests/artifact_manager_tool_test.py index 2484e08be98..b4daf022e8d 100644 --- a/build_tools/tests/artifact_manager_tool_test.py +++ b/build_tools/tests/artifact_manager_tool_test.py @@ -17,6 +17,7 @@ sys.path.insert(0, os.fspath(Path(__file__).parent.parent)) from _therock_utils.artifact_backend import ArtifactBackend, LocalDirectoryBackend +from _therock_utils.run_outputs import RunOutputRoot # Minimal topology TOML for testing push/fetch behavior. # Defines two stages: upstream-stage produces artifacts, downstream-stage consumes them. @@ -90,11 +91,16 @@ def __init__( # Use a real backend for successful operations if staging_dir: - self._real_backend = LocalDirectoryBackend( - staging_dir=staging_dir, + run_root = RunOutputRoot( + bucket="local", + external_repo="", run_id=run_id, platform=platform, ) + self._real_backend = LocalDirectoryBackend( + staging_dir=staging_dir, + run_root=run_root, + ) else: self._real_backend = None @@ -208,11 +214,16 @@ def _create_staged_artifact( self, name: str, component: str, target_family: str, run_id: str = "local" ) -> str: """Create a fake artifact in the staging directory.""" - backend = LocalDirectoryBackend( - staging_dir=self.staging_dir, + run_root = RunOutputRoot( + bucket="local", + external_repo="", run_id=run_id, platform=TEST_PLATFORM, ) + backend = LocalDirectoryBackend( + staging_dir=self.staging_dir, + run_root=run_root, + ) archive_name = f"{name}_{component}_{target_family}.tar.zst" temp_archive = Path(self.temp_dir) / archive_name @@ -318,11 +329,16 @@ def test_push_succeeds_when_all_uploads_succeed(self): artifact_manager.main(argv) # Verify artifacts were uploaded - backend = LocalDirectoryBackend( - staging_dir=self.staging_dir, + run_root = RunOutputRoot( + bucket="local", + external_repo="", run_id="local", platform=TEST_PLATFORM, ) + backend = LocalDirectoryBackend( + staging_dir=self.staging_dir, + run_root=run_root, + ) self.assertTrue(backend.artifact_exists("test-artifact_lib_generic.tar.zst")) # Verify sha256sum was also uploaded diff --git a/build_tools/tests/fetch_artifacts_test.py b/build_tools/tests/fetch_artifacts_test.py index 2322226f7b1..8a32f625e04 100644 --- a/build_tools/tests/fetch_artifacts_test.py +++ b/build_tools/tests/fetch_artifacts_test.py @@ -8,10 +8,10 @@ sys.path.insert(0, os.fspath(Path(__file__).parent.parent)) from fetch_artifacts import ( - BucketMetadata, list_s3_artifacts, filter_artifacts, ) +from _therock_utils.run_outputs import RunOutputRoot THIS_DIR = Path(__file__).resolve().parent REPO_DIR = THIS_DIR.parent.parent @@ -20,8 +20,11 @@ class ArtifactsIndexPageTest(unittest.TestCase): @patch("fetch_artifacts.paginator") def testListS3Artifacts_Found(self, mock_paginator): - bucket_info = BucketMetadata( - "ROCm-TheRock/", "therock-ci-artifacts", "123", "linux" + run_root = RunOutputRoot( + bucket="therock-ci-artifacts", + external_repo="ROCm-TheRock/", + run_id="123", + platform="linux", ) mock_paginator.paginate.return_value = [ { @@ -35,7 +38,7 @@ def testListS3Artifacts_Found(self, mock_paginator): {"Contents": [{"Key": "rocm-libraries/test/empty_4test.tar.xz"}]}, ] - result = list_s3_artifacts(bucket_info, "test") + result = list_s3_artifacts(run_root, "test") self.assertEqual(len(result), 4) self.assertTrue("empty_1test.tar.xz" in result) @@ -45,8 +48,11 @@ def testListS3Artifacts_Found(self, mock_paginator): @patch("fetch_artifacts.paginator") def testListS3Artifacts_NotFound(self, mock_paginator): - bucket_info = BucketMetadata( - "ROCm-TheRock/", "therock-ci-artifacts", "123", "linux" + run_root = RunOutputRoot( + bucket="therock-ci-artifacts", + external_repo="ROCm-TheRock/", + run_id="123", + platform="linux", ) mock_paginator.paginate.side_effect = ClientError( error_response={ @@ -56,7 +62,7 @@ def testListS3Artifacts_NotFound(self, mock_paginator): ) with self.assertRaises(ClientError) as context: - list_s3_artifacts(bucket_info, "test") + list_s3_artifacts(run_root, "test") self.assertEqual(context.exception.response["Error"]["Code"], "AccessDenied") From b93fdf772f7612c5b642f31432a320395fbc453f Mon Sep 17 00:00:00 2001 From: Scott Todd Date: Tue, 20 Jan 2026 09:43:07 -0800 Subject: [PATCH 16/17] Update run_outputs_test.py to support gh CLI authentication MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Integration tests now use the _skip_unless_authenticated_github_api_is_available decorator instead of checking only for GITHUB_TOKEN. This allows the tests to run when authenticated via the gh CLI, matching the pattern in github_actions_utils_test.py. Changes: - Import is_authenticated_github_api_available from github_actions_utils - Add _skip_unless_authenticated_github_api_is_available decorator - Update all 5 integration tests to use the new decorator - Update class docstring to reflect both auth methods 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- build_tools/tests/run_outputs_test.py | 43 +++++++++++++-------------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/build_tools/tests/run_outputs_test.py b/build_tools/tests/run_outputs_test.py index 435267b86b7..572a6c4e4a0 100644 --- a/build_tools/tests/run_outputs_test.py +++ b/build_tools/tests/run_outputs_test.py @@ -10,6 +10,18 @@ sys.path.insert(0, os.fspath(Path(__file__).parent.parent)) from _therock_utils.run_outputs import RunOutputRoot +from github_actions.github_actions_utils import is_authenticated_github_api_available + + +def _skip_unless_authenticated_github_api_is_available(test_func): + """Decorator to skip tests unless GitHub API is available. + + Checks for GITHUB_TOKEN env var or authenticated gh CLI. + """ + return unittest.skipUnless( + is_authenticated_github_api_available(), + "No authenticated GitHub API auth available (need GITHUB_TOKEN or authenticated gh CLI)", + )(test_func) class TestRunOutputRootProperties(unittest.TestCase): @@ -448,15 +460,12 @@ def test_release_bucket(self): class TestRunOutputRootIntegration(unittest.TestCase): """Integration tests that use real GitHub API calls. - These tests require GITHUB_TOKEN to be set and make network requests. - They verify that from_workflow_run() correctly determines bucket info - for known historical workflow runs. + These tests require authenticated GitHub API access (GITHUB_TOKEN or gh CLI) + and make network requests. They verify that from_workflow_run() correctly + determines bucket info for known historical workflow runs. """ - @unittest.skipUnless( - os.getenv("GITHUB_TOKEN"), - "GITHUB_TOKEN not set, skipping integration test", - ) + @_skip_unless_authenticated_github_api_is_available def test_from_workflow_run_older_bucket(self): """Test workflow run from before bucket cutover uses legacy bucket.""" # https://github.com/ROCm/TheRock/actions/runs/18022609292?pr=1597 @@ -468,10 +477,7 @@ def test_from_workflow_run_older_bucket(self): self.assertEqual(root.external_repo, "") self.assertEqual(root.bucket, "therock-artifacts") - @unittest.skipUnless( - os.getenv("GITHUB_TOKEN"), - "GITHUB_TOKEN not set, skipping integration test", - ) + @_skip_unless_authenticated_github_api_is_available def test_from_workflow_run_newer_bucket(self): """Test workflow run from after bucket cutover uses new bucket.""" # https://github.com/ROCm/TheRock/actions/runs/19680190301 @@ -483,10 +489,7 @@ def test_from_workflow_run_newer_bucket(self): self.assertEqual(root.external_repo, "") self.assertEqual(root.bucket, "therock-ci-artifacts") - @unittest.skipUnless( - os.getenv("GITHUB_TOKEN"), - "GITHUB_TOKEN not set, skipping integration test", - ) + @_skip_unless_authenticated_github_api_is_available def test_from_workflow_run_fork(self): """Test workflow run from fork uses external bucket with repo prefix.""" # https://github.com/ROCm/TheRock/actions/runs/18023442478?pr=1596 @@ -498,10 +501,7 @@ def test_from_workflow_run_fork(self): self.assertEqual(root.external_repo, "ROCm-TheRock/") self.assertEqual(root.bucket, "therock-artifacts-external") - @unittest.skipUnless( - os.getenv("GITHUB_TOKEN"), - "GITHUB_TOKEN not set, skipping integration test", - ) + @_skip_unless_authenticated_github_api_is_available def test_from_workflow_run_external_repo_older(self): """Test workflow run from external repo (rocm-libraries) older bucket.""" # https://github.com/ROCm/rocm-libraries/actions/runs/18020401326?pr=1828 @@ -513,10 +513,7 @@ def test_from_workflow_run_external_repo_older(self): self.assertEqual(root.external_repo, "ROCm-rocm-libraries/") self.assertEqual(root.bucket, "therock-artifacts-external") - @unittest.skipUnless( - os.getenv("GITHUB_TOKEN"), - "GITHUB_TOKEN not set, skipping integration test", - ) + @_skip_unless_authenticated_github_api_is_available def test_from_workflow_run_external_repo_newer(self): """Test workflow run from external repo (rocm-libraries) newer bucket.""" # https://github.com/ROCm/rocm-libraries/actions/runs/19784318631 From 21dd4cd9d8b6f12572ca001b5c4da47c959233b3 Mon Sep 17 00:00:00 2001 From: Scott Todd Date: Tue, 20 Jan 2026 12:42:25 -0800 Subject: [PATCH 17/17] Clarify docs, mention ASan artifact groups, link to specific examples --- build_tools/_therock_utils/run_outputs.py | 4 +++- docs/development/run_outputs_layout.md | 8 ++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/build_tools/_therock_utils/run_outputs.py b/build_tools/_therock_utils/run_outputs.py index f269e5f8e5b..4e4fff98c5a 100644 --- a/build_tools/_therock_utils/run_outputs.py +++ b/build_tools/_therock_utils/run_outputs.py @@ -4,6 +4,8 @@ GitHub Actions workflow run. All tools that read or write run outputs should use this module to compute paths. +See the documentation at /docs/development/run_outputs_layout.md. + A "run output" is anything produced by a CI workflow run: - Build artifacts (.tar.xz, .tar.zst archives) - Logs (.log files, ninja_logs.tar.gz) @@ -35,7 +37,7 @@ - {name} = artifact name (e.g., "blas", "core-hip", "amd-llvm") - {component} = dev|lib|run|test|dbg|doc - {family} = "generic" or GPU family (gfx94X, gfx1100, etc.) -- {artifact_group} = build variant (e.g., "gfx94X-dcgpu", "gfx110X-all") +- {artifact_group} = build variant (e.g., "gfx94X-dcgpu", "gfx950-dcgpu-asan") Multi-Platform / Multi-Group Organization ----------------------------------------- diff --git a/docs/development/run_outputs_layout.md b/docs/development/run_outputs_layout.md index d9f43aa6717..6a079584028 100644 --- a/docs/development/run_outputs_layout.md +++ b/docs/development/run_outputs_layout.md @@ -81,7 +81,7 @@ Where: | `name` | Artifact name | `blas`, `core-hip`, `amd-llvm` | | `component` | Role of files contained | `dev`, `lib`, `run`, `test`, `dbg`, `doc` | | `family` | Target GPU family or `generic` | `generic`, `gfx94X`, `gfx1100` | -| `artifact_group` | Build variant identifier | `gfx94X-dcgpu`, `gfx110X-all` | +| `artifact_group` | Build variant identifier | `gfx94X-dcgpu`, `gfx950-dcgpu-asan` | ## Multi-Platform / Multi-Group Organization @@ -137,9 +137,9 @@ https://{bucket}.s3.amazonaws.com/{prefix}/... Example URLs: -- Artifact index: `https://therock-ci-artifacts.s3.amazonaws.com/12345678901-linux/index-gfx94X-dcgpu.html` -- Build logs: `https://therock-ci-artifacts.s3.amazonaws.com/12345678901-linux/logs/gfx94X-dcgpu/index.html` -- Manifest: `https://therock-ci-artifacts.s3.amazonaws.com/12345678901-linux/manifests/gfx94X-dcgpu/therock_manifest.json` +- Artifact index: https://therock-ci-artifacts-external.s3.amazonaws.com/ROCm-TheRock/21155514542-linux/index-gfx110X-all.html +- Build logs: https://therock-ci-artifacts-external.s3.amazonaws.com/ROCm-TheRock/21155514542-linux/logs/gfx110X-all/index.html +- Manifest: https://therock-ci-artifacts-external.s3.amazonaws.com/ROCm-TheRock/21155514542-linux/manifests/gfx110X-all/therock_manifest.json ## Using RunOutputRoot