diff --git a/build_tools/packaging/linux/build_package.py b/build_tools/packaging/linux/build_package.py index 4a8dc47621a..5c044fdb503 100755 --- a/build_tools/packaging/linux/build_package.py +++ b/build_tools/packaging/linux/build_package.py @@ -27,6 +27,7 @@ from datetime import datetime, timezone from email.utils import format_datetime from jinja2 import Environment, FileSystemLoader, Template +from packaging_summary import * from packaging_utils import * from pathlib import Path from runpath_to_rpath import * @@ -48,7 +49,8 @@ def create_deb_package(pkg_name, config: PackageConfig): pkg_name : Name of the package to be created config: Configuration object containing package metadata - Returns: None + Returns: + output_list: List of packages created """ print_function_name() print(f"Package Name: {pkg_name}") @@ -58,9 +60,10 @@ def create_deb_package(pkg_name, config: PackageConfig): create_nonversioned_deb_package(pkg_name, config) create_versioned_deb_package(pkg_name, config) - move_packages_to_destination(pkg_name, config) + output_list = move_packages_to_destination(pkg_name, config) # Clean debian build directory remove_dir(Path(config.dest_dir) / config.pkg_type) + return output_list def create_nonversioned_deb_package(pkg_name, config: PackageConfig): @@ -442,6 +445,34 @@ def package_with_dpkg_build(pkg_dir): ######################## RPM package creation #################### +def create_rpm_package(pkg_name, config: PackageConfig): + """Create an RPM package. + + This function invokes the creation of versioned and non-versioned packages + and moves the resulting `.rpm` files to the destination directory. + + Parameters: + pkg_name : Name of the package to be created + config: Configuration object containing package metadata + + Returns: + output_list: List of packages created + """ + print_function_name() + print(f"Package Name: {pkg_name}") + + # By default both versioned and non versioned packages are created. + # In case rpath is enabled need to create only versioned package. So skipping nonversioned here + if not config.enable_rpath: + create_nonversioned_rpm_package(pkg_name, config) + + create_versioned_rpm_package(pkg_name, config) + output_list = move_packages_to_destination(pkg_name, config) + # Clean rpm build directory + remove_dir(Path(config.dest_dir) / config.pkg_type) + return output_list + + def create_nonversioned_rpm_package(pkg_name, config: PackageConfig): """Create a non-versioned RPM meta package (.rpm). @@ -487,30 +518,6 @@ def create_versioned_rpm_package(pkg_name, config: PackageConfig): package_with_rpmbuild(specfile) -def create_rpm_package(pkg_name, config: PackageConfig): - """Create an RPM package. - - This function invokes the creation of versioned and non-versioned packages - and moves the resulting `.rpm` files to the destination directory. - - Parameters: - pkg_name : Name of the package to be created - config: Configuration object containing package metadata - - Returns: None - """ - print_function_name() - print(f"Package Name: {pkg_name}") - - if not config.enable_rpath: - create_nonversioned_rpm_package(pkg_name, config) - - create_versioned_rpm_package(pkg_name, config) - move_packages_to_destination(pkg_name, config) - # Clean rpm build directory - remove_dir(Path(config.dest_dir) / config.pkg_type) - - def generate_spec_file(pkg_name, specfile, config: PackageConfig): """Generate an RPM spec file. @@ -673,20 +680,22 @@ def package_with_rpmbuild(spec_file): ######################## Begin Packaging Process################################ -def parse_input_package_list(pkg_name): +def parse_input_package_list(pkg_name, artifact_dir): """Populate the package list from the provided input arguments. Parameters: pkg_name : List of packages to be created + artifact_dir: The path to the Artifactory directory Returns: Package list """ print_function_name() pkg_list = [] + skipped_list = [] # If pkg_name is None, include all packages if pkg_name is None: - pkg_list = get_package_list() - return pkg_list + pkg_list, skipped_list = get_package_list(artifact_dir) + return pkg_list, skipped_list # Proceed if pkg_name is not None data = read_package_json_file() @@ -705,7 +714,7 @@ def parse_input_package_list(pkg_name): break print(f"pkg_list:\n {pkg_list}") - return pkg_list + return pkg_list, skipped_list def clean_package_build_dir(config: PackageConfig): @@ -766,18 +775,52 @@ def run(args: argparse.Namespace): # Clean the packaging build directories clean_package_build_dir(config) - pkg_list = parse_input_package_list(args.pkg_names) + pkg_list, skipped_list = parse_input_package_list( + args.pkg_names, config.artifacts_dir + ) # Create deb/rpm packages - package_creators = {"deb": create_deb_package, "rpm": create_rpm_package} - for pkg_name in pkg_list: - if config.pkg_type and config.pkg_type.lower() in package_creators: - print(f"Create {config.pkg_type.upper()} package.") - package_creators[config.pkg_type.lower()](pkg_name, config) - else: - print("Create both DEB and RPM packages.") - for creator in package_creators.values(): - creator(pkg_name, config) - clean_package_build_dir(config) + valid_types = {"deb", "rpm"} + pkg_type = (config.pkg_type or "").lower() + if pkg_type not in valid_types: + raise ValueError( + f"Invalid package type: {config.pkg_type}. Must be 'deb' or 'rpm'." + ) + + try: + built_pkglist = [] + for pkg_name in pkg_list: + print(f"Create {pkg_type} package.") + if pkg_type == "rpm": + output_list = create_rpm_package(pkg_name, config) + else: + output_list = create_deb_package(pkg_name, config) + + if output_list: + built_pkglist.extend(output_list) + print(f"Built package List: {built_pkglist}") + + # Clean the build directories + clean_package_build_dir(config) + + pkglist_status = PackageList( + total=pkg_list, + built=built_pkglist, + skipped=skipped_list, + ) + + # Print build summary + print_build_summary(config, pkglist_status) + except SystemExit: + # Build aborted somewhere inside create_* functions + print("\n❌ Build aborted due to an error.\n") + pkglist_status = PackageList( + total=pkg_list, + built=built_pkglist, + skipped=skipped_list, + ) + print_build_summary(config, pkglist_status) + # Stop the program + raise def main(argv: list[str]): diff --git a/build_tools/packaging/linux/packaging_summary.py b/build_tools/packaging/linux/packaging_summary.py new file mode 100644 index 00000000000..99fe4f8f67a --- /dev/null +++ b/build_tools/packaging/linux/packaging_summary.py @@ -0,0 +1,118 @@ +# Copyright Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT + +from dataclasses import dataclass, field +from datetime import datetime, timezone +from packaging_utils import * +from typing import List + + +@dataclass +class PackageList: + # All base package names that were attempted + total: List[str] + # Packages that were successfully created (versioned + non-versioned) + built: List[str] + # Base packages that were skipped + skipped: List[str] + + +def write_build_manifest(config: PackageConfig, pkg_list: PackageList): + """Write manifest files listing built and skipped packages. + + Parameters: + config: Configuration object containing package metadata + pkg_list: List of all packages attempted/built/skipped + + Returns: None + """ + print_function_name() + + # Write successful packages manifest + manifest_file = Path(config.dest_dir) / "built_packages.txt" + + total_basepkg = len(pkg_list.total) + len(pkg_list.skipped) + expected = 2 * len(pkg_list.total) + built = len(pkg_list.built) + failed = expected - built + + try: + with open(manifest_file, "w", encoding="utf-8") as f: + f.write(f"# Built Packages Manifest\n") + f.write(f"# Package Type: {config.pkg_type.upper()}\n") + f.write(f"# ROCm Version: {config.rocm_version}\n") + f.write(f"# Graphics Architecture: {config.gfx_arch}\n") + f.write( + f"# Build Date: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}\n" + ) + f.write(f"# Total base packages: {total_basepkg}\n") + f.write(f"# Skipped base packages: {len(pkg_list.skipped)}\n") + f.write( + f"# Total packages attempted (versioned + non-versioned): {expected}\n" + ) + f.write(f"# Successfully built: {built}\n") + f.write(f"# Failed to build: {failed}\n") + f.write(f"\n") + + if pkg_list.built: + f.write(f"# Created Packages:\n") + for pkg in sorted(pkg_list.built): + f.write(f"{pkg}\n") + + if pkg_list.skipped: + f.write(f"\n# Skipped Packages:\n") + f.write( + f"# Note: Package names shown are base names from package.json\n" + ) + for pkg in sorted(pkg_list.skipped): + f.write(f"{pkg}\n") + + print(f"✅ Built packages manifest written to: {manifest_file}") + except Exception as e: + print(f"⚠️ WARNING: Failed to write built packages manifest: {e}") + + +def print_build_status(config: PackageConfig, pkg_list: PackageList): + """Print a summary of the build process. + + Parameters: + config: Configuration object containing package metadata + pkg_list: List of all packages attempted/built/skipped + + Returns: None + """ + print("\n" + "=" * 80) + print("BUILD SUMMARY") + print("=" * 80) + + total_basepkg = len(pkg_list.total) + len(pkg_list.skipped) + expected = 2 * len(pkg_list.total) + built = len(pkg_list.built) + failed = expected - built + + print(f"\nTotal base packages: {total_basepkg} ") + print(f"⏭️ Skipped base packages: {len(pkg_list.skipped)}") + print(f"Total packages attempted (versioned + non-versioned): {expected}") + print(f"✅ Successfully built: {built}") + print(f"❌ Failed to build: {failed}") + + print(f"\nCreated packages") + for pkg in sorted(pkg_list.built): + print(f" - {pkg}") + + if pkg_list.skipped: + print(f"\n⏭️ Skipped packages") + print(f" (Base package names from package.json)") + for pkg in sorted(pkg_list.skipped): + print(f" - {pkg}") + + print("\n" + "=" * 80) + print(f"Package type: {config.pkg_type.upper()}") + print(f"ROCm version: {config.rocm_version}") + print(f"Output directory: {config.dest_dir}") + print("=" * 80 + "\n") + + +def print_build_summary(config: PackageConfig, pkg_list: PackageList): + write_build_manifest(config, pkg_list) + print_build_status(config, pkg_list) diff --git a/build_tools/packaging/linux/packaging_utils.py b/build_tools/packaging/linux/packaging_utils.py index 6aeaed64a79..879a0ab2c33 100644 --- a/build_tools/packaging/linux/packaging_utils.py +++ b/build_tools/packaging/linux/packaging_utils.py @@ -233,20 +233,65 @@ def get_package_info(pkgname): return None -def get_package_list(): - """Read package.json and return package names. +def get_package_list(artifact_dir): + """Read package.json and return a list of package names. - Packages marked as 'Disablepackaging' will be excluded from the list + Packages marked as 'Disablepackaging' are excluded. + If the entire Artifactory directory is missing, the package is excluded + unless it is a metapackage. - Parameters: None + Parameters: + artifact_dir : The path to the Artifactory directory - Returns: Package list + Returns: + pkg_list : list of package names that will be packaged + skipped_list : list of package names excluded due to missing artifacts """ - + pkg_list = [] + skipped = [] data = read_package_json_file() - pkg_list = [pkg["Package"] for pkg in data if not is_packaging_disabled(pkg)] - return pkg_list + try: + dir_entries = os.listdir(artifact_dir) + except FileNotFoundError: + sys.exit(f"{artifact_dir}: Artifactory directory doesn not exist, Exiting") + + for pkg_info in data: + pkg_name = pkg_info["Package"] + # Skip disabled packages + if is_packaging_disabled(pkg_info): + continue + + # metapackages don't need artifact lookup + if is_meta_package(pkg_info): + pkg_list.append(pkg_name) + continue + + artifactory_list = pkg_info.get("Artifactory", []) + artifact_found = False + + for artifactory in artifactory_list: + artifact_name = artifactory.get("Artifact") + if not artifact_name: + continue + + # Look for directories starting with the artifact name + for entry in dir_entries: + path = Path(artifact_dir) / entry + + if entry.startswith(artifact_name) and path.is_dir(): + artifact_found = True + break + + if artifact_found: + break + + if artifact_found: + pkg_list.append(pkg_name) + else: + skipped.append(pkg_name) + + return pkg_list, skipped def remove_dir(dir_name): @@ -378,10 +423,17 @@ def convert_to_versiondependency(dependency_list, config: PackageConfig): local_config = copy.deepcopy(config) local_config.versioned_pkg = True - pkg_list = get_package_list() + pkg_list, skipped_list = get_package_list(config.artifacts_dir) + + filtered_deps = [] + # Remove amdrocm* packages that are NOT in pkg_list + for pkg in dependency_list: + if not (pkg.startswith("amdrocm") and pkg not in pkg_list): + filtered_deps.append(pkg) + updated_depends = [ f"{update_package_name(pkg,local_config)}" if pkg in pkg_list else pkg - for pkg in dependency_list + for pkg in filtered_deps ] depends = ", ".join(updated_depends) return depends @@ -403,7 +455,7 @@ def append_version_suffix(dep_string, config: PackageConfig): """ print_function_name() - pkg_list = get_package_list() + pkg_list, skipped_list = get_package_list(config.artifacts_dir) updated_depends = [] dep_list = [d.strip() for d in dep_string.split(",")] @@ -438,10 +490,11 @@ def move_packages_to_destination(pkg_name, config: PackageConfig): pkg_name : Package name config: Configuration object containing package metadata - Returns: None + Returns: + output_packages : list of package names moved to the destination folder """ print_function_name() - + output_packages = [] # Create destination dir to move the packages created os.makedirs(config.dest_dir, exist_ok=True) print(f"Package name: {pkg_name}") @@ -466,6 +519,9 @@ def move_packages_to_destination(pkg_name, config: PackageConfig): dest_file.unlink() shutil.move(str(file_path), str(config.dest_dir)) + output_packages.append(file_name) + + return output_packages def filter_components_fromartifactory(pkg_name, artifacts_dir, gfx_arch): @@ -517,17 +573,24 @@ def filter_components_fromartifactory(pkg_name, artifacts_dir, gfx_arch): / f"{artifact_prefix}_{component}_{artifact_suffix}" ) filename = source_dir / "artifact_manifest.txt" - with open(filename, "r", encoding="utf-8") as file: - for line in file: - - match_found = ( - isinstance(artifact_subdir, str) - and (artifact_subdir.lower() + "/") in line.lower() - ) - - if match_found and line.strip(): - print("Matching line:", line.strip()) - source_path = source_dir / line.strip() - sourcedir_list.append(source_path) + if not filename.exists(): + print(f"{pkg_name} : Missing {filename}") + continue + try: + with filename.open("r", encoding="utf-8") as file: + for line in file: + + match_found = ( + isinstance(artifact_subdir, str) + and (artifact_subdir.lower() + "/") in line.lower() + ) + + if match_found and line.strip(): + print("Matching line:", line.strip()) + source_path = source_dir / line.strip() + sourcedir_list.append(source_path) + except OSError as e: + print(f"Could not read manifest {filename}: {e}") + continue return sourcedir_list diff --git a/build_tools/packaging/linux/upload_package_repo.py b/build_tools/packaging/linux/upload_package_repo.py index 2948f1d19fe..08c36ac8fc1 100644 --- a/build_tools/packaging/linux/upload_package_repo.py +++ b/build_tools/packaging/linux/upload_package_repo.py @@ -83,327 +83,420 @@ def generate_indexes_recursive(root): generate_index_html(d) -def regenerate_repo_metadata_from_s3( - s3, bucket, prefix, pkg_type, uploaded_packages, job_type="nightly" -): - """Regenerate repository metadata efficiently using merge approach. +def regenerate_rpm_metadata_from_s3(s3, bucket, prefix, uploaded_packages): + """Regenerate RPM repository metadata using merge approach. - This uses mergerepo_c (RPM) or merges Packages files (DEB) to efficiently - update metadata without re-downloading all packages from S3. + Downloads existing repodata from S3, generates metadata for new packages, + merges them using mergerepo_c, and uploads the result back to S3. Args: s3: boto3 S3 client bucket: S3 bucket name prefix: S3 prefix (e.g., 'rpm/20251222-12345') - pkg_type: Package type ('rpm' or 'deb') - uploaded_packages: List of actually uploaded package file paths (avoids duplicates from deduplication) - job_type: Job type for Release file metadata (default: 'nightly') + uploaded_packages: List of actually uploaded .rpm file paths """ import tempfile - print(f"Updating {pkg_type.upper()} repository metadata (merge mode)...") + print(f"Updating RPM repository metadata (merge mode)...") # Create temporary directory for metadata operations with tempfile.TemporaryDirectory() as temp_dir: temp_path = Path(temp_dir) - if pkg_type == "rpm": - # Efficient approach: Download existing repodata and merge with new packages - old_repo_dir = temp_path / "old_repo" - new_repo_dir = temp_path / "new_repo" - merged_repo_dir = temp_path / "merged_repo" + # Efficient approach: Download existing repodata and merge with new packages + old_repo_dir = temp_path / "old_repo" + new_repo_dir = temp_path / "new_repo" + merged_repo_dir = temp_path / "merged_repo" - old_repo_dir.mkdir(parents=True, exist_ok=True) - new_repo_dir.mkdir(parents=True, exist_ok=True) - merged_repo_dir.mkdir(parents=True, exist_ok=True) + old_repo_dir.mkdir(parents=True, exist_ok=True) + new_repo_dir.mkdir(parents=True, exist_ok=True) + merged_repo_dir.mkdir(parents=True, exist_ok=True) - # Step 1: Download existing repodata from S3 (small files) - old_repodata_dir = old_repo_dir / "repodata" - old_repodata_dir.mkdir(parents=True, exist_ok=True) + # Step 1: Download existing repodata from S3 (small files) + old_repodata_dir = old_repo_dir / "repodata" + old_repodata_dir.mkdir(parents=True, exist_ok=True) - print( - f"Downloading existing repository metadata from S3: s3://{bucket}/{prefix}/x86_64/repodata/" - ) - repodata_files = [] - try: - paginator = s3.get_paginator("list_objects_v2") - for page in paginator.paginate( - Bucket=bucket, Prefix=f"{prefix}/x86_64/repodata/" - ): - if "Contents" not in page: - continue - for obj in page["Contents"]: - key = obj["Key"] - filename = Path(key).name - local_file = old_repodata_dir / filename - s3.download_file(bucket, key, str(local_file)) - repodata_files.append(filename) - print(f" Downloaded: {filename}") - if repodata_files: - print( - f"✅ Found {len(repodata_files)} existing metadata files to merge" - ) - else: - print("No existing metadata files found") - except Exception as e: - print(f"⚠️ No existing repodata found (new repo?): {e}") - - # Step 2: Generate repodata for NEW packages only (actually uploaded ones) - rpm_packages = [p for p in uploaded_packages if p.endswith(".rpm")] - if rpm_packages: + print( + f"Downloading existing repository metadata from S3: s3://{bucket}/{prefix}/x86_64/repodata/" + ) + repodata_files = [] + try: + paginator = s3.get_paginator("list_objects_v2") + for page in paginator.paginate( + Bucket=bucket, Prefix=f"{prefix}/x86_64/repodata/" + ): + if "Contents" not in page: + continue + for obj in page["Contents"]: + key = obj["Key"] + filename = Path(key).name + local_file = old_repodata_dir / filename + s3.download_file(bucket, key, str(local_file)) + repodata_files.append(filename) + print(f" Downloaded: {filename}") + if repodata_files: print( - f"Generating metadata for {len(rpm_packages)} uploaded RPM packages..." - ) - # Copy uploaded RPMs to temp dir - new_arch_dir = new_repo_dir / "x86_64" - new_arch_dir.mkdir(parents=True, exist_ok=True) - for rpm_file in rpm_packages: - shutil.copy2(rpm_file, new_arch_dir / Path(rpm_file).name) - - # Generate repodata for new packages with clean paths (no baseurl) - run_command( - "createrepo_c --no-database --simple-md-filenames .", - cwd=str(new_arch_dir), + f"✅ Found {len(repodata_files)} existing metadata files to merge" ) - print("✅ Generated metadata for uploaded packages") else: - print("No new RPM packages uploaded (all deduplicated)") - # Still need to ensure old metadata is preserved! - if repodata_files: - print("Preserving existing repodata...") - # Just re-upload the existing repodata we downloaded - for metadata_file in old_repodata_dir.iterdir(): - if metadata_file.is_file(): - s3_key = f"{prefix}/x86_64/repodata/{metadata_file.name}" - s3.upload_file(str(metadata_file), bucket, s3_key) - print(f" Uploaded: {metadata_file.name}") - print("✅ RPM repository metadata preserved") - return - - # Step 3: Merge repositories using mergerepo_c (no need to download all RPMs!) - merged_arch_dir = merged_repo_dir / "x86_64" - merged_arch_dir.mkdir(parents=True, exist_ok=True) - - if repodata_files: # If we have existing metadata - print("Merging old and new repository metadata...") - # mergerepo_c merges repodata without needing actual RPM files! - # Use --no-database, --simple-md-filenames, and --omit-baseurl to ensure clean paths - run_command( - f"mergerepo_c --no-database --simple-md-filenames --omit-baseurl " - f'--repo "{old_repo_dir}" --repo "{new_repo_dir / "x86_64"}" ' - f'--outputdir "{merged_arch_dir}"', - cwd=str(temp_path), - ) - print("✅ Merged repository metadata") - else: # First upload, no existing metadata - print("First upload - using new repository metadata") - shutil.copytree( - new_repo_dir / "x86_64" / "repodata", merged_arch_dir / "repodata" - ) + print("No existing metadata files found") + except Exception as e: + print(f"⚠️ No existing repodata found (new repo?): {e}") - # Step 4: Upload merged repodata to S3 - merged_repodata = merged_arch_dir / "repodata" - if merged_repodata.exists(): - print("Uploading merged repository metadata to S3...") - uploaded_metadata = [] - for metadata_file in merged_repodata.iterdir(): + # Step 2: Generate repodata for NEW packages only (actually uploaded ones) + rpm_packages = [p for p in uploaded_packages if p.endswith(".rpm")] + if rpm_packages: + print( + f"Generating metadata for {len(rpm_packages)} uploaded RPM packages..." + ) + # Copy uploaded RPMs to temp dir + new_arch_dir = new_repo_dir / "x86_64" + new_arch_dir.mkdir(parents=True, exist_ok=True) + for rpm_file in rpm_packages: + shutil.copy2(rpm_file, new_arch_dir / Path(rpm_file).name) + + # Generate repodata for new packages with clean paths (no baseurl) + run_command( + "createrepo_c --no-database --simple-md-filenames .", + cwd=str(new_arch_dir), + ) + print("✅ Generated metadata for uploaded packages") + else: + print("No new RPM packages uploaded (all deduplicated)") + # Still need to ensure old metadata is preserved! + if repodata_files: + print("Preserving existing repodata...") + # Just re-upload the existing repodata we downloaded + for metadata_file in old_repodata_dir.iterdir(): if metadata_file.is_file(): s3_key = f"{prefix}/x86_64/repodata/{metadata_file.name}" s3.upload_file(str(metadata_file), bucket, s3_key) - uploaded_metadata.append(metadata_file.name) print(f" Uploaded: {metadata_file.name}") - print( - f"✅ RPM repository metadata updated: {len(uploaded_metadata)} files" - ) + print("✅ RPM repository metadata preserved") + return + + # Step 3: Merge repositories using mergerepo_c (no need to download all RPMs!) + merged_arch_dir = merged_repo_dir / "x86_64" + merged_arch_dir.mkdir(parents=True, exist_ok=True) + + if repodata_files: # If we have existing metadata + print("Merging old and new repository metadata...") + # mergerepo_c merges repodata without needing actual RPM files! + # Use --no-database, --simple-md-filenames, and --omit-baseurl to ensure clean paths + run_command( + f"mergerepo_c --no-database --simple-md-filenames --omit-baseurl " + f'--repo "{old_repo_dir}" --repo "{new_repo_dir / "x86_64"}" ' + f'--outputdir "{merged_arch_dir}"', + cwd=str(temp_path), + ) + print("✅ Merged repository metadata") + else: # First upload, no existing metadata + print("First upload - using new repository metadata") + shutil.copytree( + new_repo_dir / "x86_64" / "repodata", merged_arch_dir / "repodata" + ) - elif pkg_type == "deb": - # Efficient approach: Merge existing Packages file with new packages - dists_dir = temp_path / "dists" / "stable" / "main" / "binary-amd64" - dists_dir.mkdir(parents=True, exist_ok=True) + # Step 4: Upload merged repodata to S3 + merged_repodata = merged_arch_dir / "repodata" + if merged_repodata.exists(): + print("Uploading merged repository metadata to S3...") + uploaded_metadata = [] + for metadata_file in merged_repodata.iterdir(): + if metadata_file.is_file(): + s3_key = f"{prefix}/x86_64/repodata/{metadata_file.name}" + s3.upload_file(str(metadata_file), bucket, s3_key) + uploaded_metadata.append(metadata_file.name) + print(f" Uploaded: {metadata_file.name}") + print(f"✅ RPM repository metadata updated: {len(uploaded_metadata)} files") - pool_dir = temp_path / "pool" / "main" - pool_dir.mkdir(parents=True, exist_ok=True) - # Step 1: Download existing Packages file from S3 (small file) - existing_packages = dists_dir / "Packages.old" - packages_s3_key = f"{prefix}/dists/stable/main/binary-amd64/Packages" - try: - print( - f"Downloading existing Packages file from S3: s3://{bucket}/{packages_s3_key}" - ) - s3.download_file(bucket, packages_s3_key, str(existing_packages)) - # Count existing packages - with open(existing_packages, "r") as f: - content = f.read() - pkg_count = content.count("\nPackage: ") - print(f"✅ Downloaded existing Packages file ({pkg_count} packages)") - except Exception as e: - print(f"⚠️ No existing Packages file found (new repo?): {e}") - existing_packages = None - - # Step 2: Generate Packages entries for NEW packages only (actually uploaded ones) - deb_packages = [p for p in uploaded_packages if p.endswith(".deb")] - if deb_packages: - print( - f"Generating Packages entries for {len(deb_packages)} uploaded DEB packages..." - ) - # Copy uploaded DEBs to temp dir - for deb_file in deb_packages: - shutil.copy2(deb_file, pool_dir / Path(deb_file).name) - - # Generate Packages entries for uploaded packages - new_packages = dists_dir / "Packages.new" - run_command( - f'dpkg-scanpackages -m pool/main /dev/null > "{new_packages}"', - cwd=str(temp_path), - ) - print("✅ Generated Packages entries for uploaded packages") - else: - print("No new DEB packages uploaded (all deduplicated)") - # Still need to ensure old metadata is preserved! - if existing_packages and existing_packages.exists(): - import datetime - - print("Preserving existing Packages file...") - shutil.copy2(existing_packages, dists_dir / "Packages") - run_command("gzip -9c Packages > Packages.gz", cwd=str(dists_dir)) - - # Generate Release file - release_dir = temp_path / "dists" / "stable" - release_dir.mkdir(parents=True, exist_ok=True) - release_file = release_dir / "Release" - - with open(release_file, "w") as f: - f.write( - f"""Origin: AMD ROCm +def generate_release_file_with_checksums(release_file, job_type, dists_dir): + """Generate a Debian Release file with MD5Sum, SHA1, and SHA256 checksums. + + Args: + release_file: Path to the Release file to create + job_type: Job type for metadata (nightly/dev/release) + dists_dir: Directory containing Packages files (main/binary-amd64/) + """ + import hashlib + import datetime + + # Files to hash (relative paths from dists/stable/) + files_to_hash = [ + (dists_dir / "Packages", "main/binary-amd64/Packages"), + (dists_dir / "Packages.gz", "main/binary-amd64/Packages.gz"), + ] + + # Calculate all hashes + md5_entries = [] + sha1_entries = [] + sha256_entries = [] + + for file_path, rel_path in files_to_hash: + if not file_path.exists(): + continue + + # Get file size + file_size = file_path.stat().st_size + + # Calculate hashes + md5_hash = hashlib.md5() + sha1_hash = hashlib.sha1() + sha256_hash = hashlib.sha256() + + with open(file_path, "rb") as f: + while True: + data = f.read(65536) # Read in 64KB chunks + if not data: + break + md5_hash.update(data) + sha1_hash.update(data) + sha256_hash.update(data) + + # Store entries (space-aligned format) + md5_entries.append(f" {md5_hash.hexdigest()} {file_size:16d} {rel_path}") + sha1_entries.append(f" {sha1_hash.hexdigest()} {file_size:16d} {rel_path}") + sha256_entries.append(f" {sha256_hash.hexdigest()} {file_size:16d} {rel_path}") + + # Write Release file + with open(release_file, "w") as f: + # Header fields + f.write( + f"""Origin: AMD ROCm Label: ROCm {job_type} Packages Suite: stable Codename: stable Architectures: amd64 Components: main +Description: ROCm APT Repository Date: {datetime.datetime.utcnow():%a, %d %b %Y %H:%M:%S UTC} """ - ) + ) + + # MD5Sum section + if md5_entries: + f.write("MD5Sum:\n") + f.write("\n".join(md5_entries)) + f.write("\n") + + # SHA1 section + if sha1_entries: + f.write("SHA1:\n") + f.write("\n".join(sha1_entries)) + f.write("\n") + + # SHA256 section + if sha256_entries: + f.write("SHA256:\n") + f.write("\n".join(sha256_entries)) + f.write("\n") + + print(f"✅ Release file generated with checksums: MD5, SHA1, SHA256") + + +def upload_deb_metadata_to_s3(s3, bucket, prefix, dists_dir, release_file): + """Helper function to upload Debian metadata files to S3. + + Args: + s3: boto3 S3 client + bucket: S3 bucket name + prefix: S3 prefix + dists_dir: Directory containing Packages files + release_file: Path to Release file + """ + packages_file = dists_dir / "Packages" + packages_gz = dists_dir / "Packages.gz" + + uploaded_count = 0 + if packages_file.exists(): + s3_key = f"{prefix}/dists/stable/main/binary-amd64/Packages" + s3.upload_file(str(packages_file), bucket, s3_key) + print(f" Uploaded: Packages") + uploaded_count += 1 + + if packages_gz.exists(): + s3_key = f"{prefix}/dists/stable/main/binary-amd64/Packages.gz" + s3.upload_file(str(packages_gz), bucket, s3_key) + print(f" Uploaded: Packages.gz") + uploaded_count += 1 - packages_file = dists_dir / "Packages" - packages_gz = dists_dir / "Packages.gz" + if release_file.exists(): + s3_key = f"{prefix}/dists/stable/Release" + s3.upload_file(str(release_file), bucket, s3_key) + print(f" Uploaded: Release") + uploaded_count += 1 - if packages_file.exists(): - s3_key = f"{prefix}/dists/stable/main/binary-amd64/Packages" - s3.upload_file(str(packages_file), bucket, s3_key) - print(f" Uploaded: Packages") + print(f"✅ DEB repository metadata updated: {uploaded_count} files") - if packages_gz.exists(): - s3_key = f"{prefix}/dists/stable/main/binary-amd64/Packages.gz" - s3.upload_file(str(packages_gz), bucket, s3_key) - print(f" Uploaded: Packages.gz") - if release_file.exists(): - s3_key = f"{prefix}/dists/stable/Release" - s3.upload_file(str(release_file), bucket, s3_key) - print(f" Uploaded: Release") - return +def regenerate_deb_metadata_from_s3( + s3, bucket, prefix, uploaded_packages, job_type="nightly" +): + """Regenerate Debian repository metadata efficiently with proper checksums. + + Uses dpkg-scanpackages for efficiency (no package downloads), but generates + proper Release file with MD5Sum, SHA1, and SHA256 checksums. + + Args: + s3: boto3 S3 client + bucket: S3 bucket name + prefix: S3 prefix (e.g., 'deb/20251222-12345') + uploaded_packages: List of local paths of ,deb packages that were successfully uploaded to s3 + job_type: Job type for Release file metadata (default: 'nightly') + """ + import tempfile + + print(f"Updating DEB repository metadata (merge mode with checksums)...") + + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + + # Setup directories + dists_dir = temp_path / "dists" / "stable" / "main" / "binary-amd64" + dists_dir.mkdir(parents=True, exist_ok=True) - # Step 3: Merge old and new Packages files (with deduplication by filename) - merged_packages = dists_dir / "Packages" + pool_dir = temp_path / "pool" / "main" + pool_dir.mkdir(parents=True, exist_ok=True) + # Step 1: Download existing Packages file from S3 (SMALL FILE - efficient!) + existing_packages = dists_dir / "Packages.old" + packages_s3_key = f"{prefix}/dists/stable/main/binary-amd64/Packages" + try: + print( + f"Downloading existing Packages file from S3: s3://{bucket}/{packages_s3_key}" + ) + s3.download_file(bucket, packages_s3_key, str(existing_packages)) + with open(existing_packages, "r") as f: + content = f.read() + pkg_count = content.count("\nPackage: ") + print( + f"✅ Downloaded existing Packages file containing ({pkg_count} packages)" + ) + except Exception as e: + print(f"⚠️ No existing Packages file found (new repo?): {e}") + existing_packages = None + + # Step 2: Generate Packages entries for NEW packages only + deb_packages = [p for p in uploaded_packages if p.endswith(".deb")] + if deb_packages: + print( + f"Generating Packages entries for {len(deb_packages)} uploaded DEB packages..." + ) + # Copy uploaded DEBs to temp dir + for deb_file in deb_packages: + shutil.copy2(deb_file, pool_dir / Path(deb_file).name) + + # Generate Packages entries for uploaded packages + new_packages = dists_dir / "Packages.new" + run_command( + f'dpkg-scanpackages -m pool/main /dev/null > "{new_packages}"', + cwd=str(temp_path), + ) + print("✅ Generated Packages entries for uploaded packages") + else: + print("No new DEB packages uploaded (all deduplicated)") if existing_packages and existing_packages.exists(): - print("Merging old and new Packages files...") - - def parse_packages_file(filepath): - """Parse Packages file into dict keyed by Filename""" - packages = {} - with open(filepath, "r") as f: - current_entry = [] - current_filename = None - - for line in f: - if line.strip() == "": # Blank line = end of entry - if current_entry and current_filename: - packages[current_filename] = ( - "\n".join(current_entry) + "\n" - ) - current_entry = [] - current_filename = None - else: - current_entry.append(line.rstrip()) - if line.startswith("Filename:"): - current_filename = line.split(":", 1)[1].strip() - - # Handle last entry (no trailing blank line) - if current_entry and current_filename: - packages[current_filename] = "\n".join(current_entry) + "\n" - - return packages - - # Parse both files - old_packages = parse_packages_file(existing_packages) - new_packages_dict = parse_packages_file(new_packages) - - print(f" Old metadata: {len(old_packages)} packages") - print(f" New metadata: {len(new_packages_dict)} packages") - - # Merge: new packages override old ones with same filename - merged = old_packages.copy() - merged.update(new_packages_dict) # New overwrites old - - # Write merged Packages file - with open(merged_packages, "w") as outfile: - for filename in sorted(merged.keys()): - outfile.write(merged[filename]) - outfile.write("\n") # Blank line separator - - print(f"✅ Merged Packages files: {len(merged)} total packages") - else: # First upload, no existing Packages file - print("First upload - using new Packages file") - shutil.copy2(new_packages, merged_packages) - - # Compress Packages file - run_command("gzip -9c Packages > Packages.gz", cwd=str(dists_dir)) - - # Step 4: Generate Release file - import datetime - - release_dir = temp_path / "dists" / "stable" - release_dir.mkdir(parents=True, exist_ok=True) - release_file = release_dir / "Release" - - with open(release_file, "w") as f: - f.write( - f"""Origin: AMD ROCm -Label: ROCm {job_type} Packages -Suite: stable -Codename: stable -Architectures: amd64 -Components: main -Date: {datetime.datetime.utcnow():%a, %d %b %Y %H:%M:%S UTC} -""" - ) + print("Preserving existing metadata...") + shutil.copy2(existing_packages, dists_dir / "Packages") + run_command("gzip -9c Packages > Packages.gz", cwd=str(dists_dir)) + + # Generate Release file with checksums + release_dir = temp_path / "dists" / "stable" + release_dir.mkdir(parents=True, exist_ok=True) + release_file = release_dir / "Release" + + generate_release_file_with_checksums(release_file, job_type, dists_dir) + + # Upload preserved files + upload_deb_metadata_to_s3(s3, bucket, prefix, dists_dir, release_file) + return + + # Step 3: Merge old and new Packages files + merged_packages = dists_dir / "Packages" + + if existing_packages and existing_packages.exists(): + print("Merging old and new Packages files...") + + def parse_packages_file(filepath): + """Parse Packages file into dict keyed by Filename""" + packages = {} + with open(filepath, "r") as f: + current_entry = [] + current_filename = None + + for line in f: + if line.strip() == "": + if current_entry and current_filename: + packages[current_filename] = ( + "\n".join(current_entry) + "\n" + ) + current_entry = [] + current_filename = None + else: + current_entry.append(line.rstrip()) + if line.startswith("Filename:"): + current_filename = line.split(":", 1)[1].strip() + + if current_entry and current_filename: + packages[current_filename] = "\n".join(current_entry) + "\n" + + return packages + + old_packages = parse_packages_file(existing_packages) + new_packages_dict = parse_packages_file(new_packages) + + print(f" Old metadata: {len(old_packages)} packages") + print(f" New metadata: {len(new_packages_dict)} packages") + + merged = old_packages.copy() + merged.update(new_packages_dict) + + with open(merged_packages, "w") as outfile: + for filename in sorted(merged.keys()): + outfile.write(merged[filename]) + outfile.write("\n") + + print(f"✅ Merged Packages files: {len(merged)} total packages") + else: + print("First upload - using new Packages file") + shutil.copy2(new_packages, merged_packages) - # Step 5: Upload merged Packages files and Release to S3 - packages_file = dists_dir / "Packages" - packages_gz = dists_dir / "Packages.gz" + # Compress Packages file + run_command("gzip -9c Packages > Packages.gz", cwd=str(dists_dir)) - uploaded_count = 0 - if packages_file.exists(): - s3_key = f"{prefix}/dists/stable/main/binary-amd64/Packages" - s3.upload_file(str(packages_file), bucket, s3_key) - print(f" Uploaded: Packages to s3://{bucket}/{s3_key}") - uploaded_count += 1 + # Step 4: Generate Release file with checksums + release_dir = temp_path / "dists" / "stable" + release_dir.mkdir(parents=True, exist_ok=True) + release_file = release_dir / "Release" - if packages_gz.exists(): - s3_key = f"{prefix}/dists/stable/main/binary-amd64/Packages.gz" - s3.upload_file(str(packages_gz), bucket, s3_key) - print(f" Uploaded: Packages.gz to s3://{bucket}/{s3_key}") - uploaded_count += 1 + generate_release_file_with_checksums(release_file, job_type, dists_dir) - if release_file.exists(): - s3_key = f"{prefix}/dists/stable/Release" - s3.upload_file(str(release_file), bucket, s3_key) - print(f" Uploaded: Release to s3://{bucket}/{s3_key}") - uploaded_count += 1 + # Step 5: Upload merged files to S3 + upload_deb_metadata_to_s3(s3, bucket, prefix, dists_dir, release_file) - print(f"✅ DEB repository metadata updated: {uploaded_count} files") + +def regenerate_repo_metadata_from_s3( + s3, bucket, prefix, pkg_type, uploaded_packages, job_type="nightly" +): + """Regenerate repository metadata efficiently using merge approach. + + This uses mergerepo_c (RPM) or merges Packages files (DEB) to efficiently + update metadata without re-downloading all packages from S3. + + Args: + s3: boto3 S3 client + bucket: S3 bucket name + prefix: S3 prefix (e.g., 'rpm/20251222-12345') + pkg_type: Package type ('rpm' or 'deb') + uploaded_packages: List of actually uploaded package file paths (avoids duplicates from deduplication) + job_type: Job type for Release file metadata (default: 'nightly') + """ + if pkg_type == "rpm": + regenerate_rpm_metadata_from_s3(s3, bucket, prefix, uploaded_packages) + elif pkg_type == "deb": + regenerate_deb_metadata_from_s3(s3, bucket, prefix, uploaded_packages, job_type) + else: + raise ValueError(f"Unsupported package type: {pkg_type}") def generate_top_index_from_s3(s3, bucket, prefix): @@ -702,6 +795,11 @@ def upload_to_s3(source_dir, bucket, prefix, dedupe=False): if fname == "index.html": continue + # Skip build manifest files - these are for local tracking only + if fname.lower().endswith(".txt"): + print(f"Skipping build manifest file (local only): {fname}") + continue + local = os.path.join(root, fname) rel = os.path.relpath(local, source_dir) key = os.path.join(prefix, rel).replace("\\", "/")