diff --git a/docs/source/reference/cli.rst b/docs/source/reference/cli.rst index 6be2798fa3..76bec2fabf 100644 --- a/docs/source/reference/cli.rst +++ b/docs/source/reference/cli.rst @@ -170,6 +170,17 @@ Run benchmarking using llm-eval. :prog: olive :path: benchmark +Generate Model Package +====================== + +Merge multiple model outputs into a model package with manifest and per-component metadata. + +.. argparse:: + :module: olive.cli.launcher + :func: get_cli_parser + :prog: olive + :path: generate-model-package + Providing Input Models ====================== diff --git a/olive/cache.py b/olive/cache.py index fe351057b9..22b13eae5b 100644 --- a/olive/cache.py +++ b/olive/cache.py @@ -384,10 +384,10 @@ def save_model( ): """Save a model from the cache to a given path.""" output_dir = Path(output_dir) if output_dir else Path.cwd() - - # If output_dir has a suffix (like .onnx), it's a file path - # Use parent directory for saving files - actual_output_dir = output_dir.parent if output_dir.suffix else output_dir + if output_dir.suffix and not output_dir.is_dir(): + actual_output_dir = output_dir.parent + else: + actual_output_dir = output_dir actual_output_dir.mkdir(parents=True, exist_ok=True) model_json = self.load_model(model_id) diff --git a/olive/cli/launcher.py b/olive/cli/launcher.py index fed339f87d..55e6ffdeb4 100644 --- a/olive/cli/launcher.py +++ b/olive/cli/launcher.py @@ -17,6 +17,7 @@ from olive.cli.generate_adapter import GenerateAdapterCommand from olive.cli.generate_cost_model import GenerateCostModelCommand from olive.cli.init import InitCommand +from olive.cli.model_package import ModelPackageCommand from olive.cli.optimize import OptimizeCommand from olive.cli.quantize import QuantizeCommand from olive.cli.run import WorkflowRunCommand @@ -54,6 +55,7 @@ def get_cli_parser(called_as_console_script: bool = True) -> ArgumentParser: ConfigureQualcommSDKCommand.register_subcommand(commands_parser) SharedCacheCommand.register_subcommand(commands_parser) ExtractAdaptersCommand.register_subcommand(commands_parser) + ModelPackageCommand.register_subcommand(commands_parser) BenchmarkCommand.register_subcommand(commands_parser) return parser diff --git a/olive/cli/model_package.py b/olive/cli/model_package.py new file mode 100644 index 0000000000..c88223d746 --- /dev/null +++ b/olive/cli/model_package.py @@ -0,0 +1,448 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +import json +import logging +import shutil +from argparse import ArgumentParser +from pathlib import Path +from typing import Optional + +from olive.cli.base import ( + BaseOliveCLICommand, + add_logging_options, + add_telemetry_options, +) +from olive.telemetry import action + +logger = logging.getLogger(__name__) + +# Model file suffixes that belong in the models/ directory, not configs/ +_MODEL_SUFFIXES = {".onnx", ".bin", ".data", ".xml"} + + +class ModelPackageCommand(BaseOliveCLICommand): + """Merge multiple Olive output directories into a model package with manifest.""" + + @staticmethod + def register_subcommand(parser: ArgumentParser): + sub_parser = parser.add_parser( + "generate-model-package", + help="Merge multiple model outputs into a model package with manifest", + ) + + sub_parser.add_argument( + "-s", + "--source", + type=str, + action="append", + required=True, + help="Source Olive output directory. Can be specified multiple times.", + ) + + sub_parser.add_argument( + "-o", + "--output_path", + type=str, + required=True, + help="Output directory for the merged model package.", + ) + + sub_parser.add_argument( + "--model_name", + type=str, + default=None, + help="Model name for the manifest. If not set, derived from the output directory name.", + ) + + sub_parser.add_argument( + "--model_version", + type=str, + default="1.0", + help="Model version string for the manifest. Default: 1.0", + ) + + add_logging_options(sub_parser) + add_telemetry_options(sub_parser) + sub_parser.set_defaults(func=ModelPackageCommand) + + @action + def run(self): + sources = self._parse_sources() + output_dir = Path(self.args.output_path) + output_dir.mkdir(parents=True, exist_ok=True) + + model_name = self.args.model_name or output_dir.name + model_version = self.args.model_version + + # Read model configs from each source + targets = [] + for target_name, source_path in sources: + model_config = self._read_model_config(source_path) + targets.append((target_name, source_path, model_config)) + + is_composite = targets[0][2].get("type") == "CompositeModel" + if is_composite: + self._package_composite(targets, output_dir, model_name, model_version) + else: + self._package_single(targets, output_dir, model_name, model_version) + + logger.info("Model package generated at %s", output_dir) + # ruff: noqa: T201 + print(f"Model package generated at {output_dir}") + + # ------------------------------------------------------------------ + # Single-component packaging + # ------------------------------------------------------------------ + + def _package_single( + self, + targets: list[tuple[str, Path, dict]], + output_dir: Path, + model_name: str, + model_version: str, + ) -> None: + """Package non-composite models (single ONNX per target).""" + config_file_names = self._copy_config_files(targets, output_dir) + task = self._extract_task(targets) + component_name = _task_to_component_name(task) + + component_dir = output_dir / "models" / component_name + component_dir.mkdir(parents=True, exist_ok=True) + + model_variants = {} + for target_name, _source_path, model_config in targets: + attrs = _get_model_attributes(model_config) + model_path = Path(model_config["config"]["model_path"]) + + target_dir = component_dir / target_name + _copy_model_files_single(model_path, target_dir) + + constraints = _build_constraints(attrs, model_path) + model_variants[target_name] = {"file": model_path.name, "constraints": constraints} + + _remove_config_files(component_dir, config_file_names) + + metadata = {"name": component_name, "model_variants": model_variants} + _write_json(component_dir / "metadata.json", metadata) + + manifest = { + "name": model_name, + "model_version": model_version, + "task": task, + "component_models": [component_name], + } + _write_json(output_dir / "manifest.json", manifest) + + # ------------------------------------------------------------------ + # Composite-model packaging + # ------------------------------------------------------------------ + + def _package_composite( + self, + targets: list[tuple[str, Path, dict]], + output_dir: Path, + model_name: str, + model_version: str, + ) -> None: + """Package composite models with per-component directory layout.""" + config_file_names = self._copy_config_files(targets, output_dir) + + # Collect component info: component_data[comp_name][target_name] = (comp_config, target_attrs) + from collections import OrderedDict + + component_data: dict[str, dict] = OrderedDict() + + for target_name, _source_path, model_config in targets: + target_attrs = _get_model_attributes(model_config) + components = model_config["config"].get("model_components", []) + component_names = model_config["config"].get("component_names", []) + + for comp_config, comp_name in zip(components, component_names): + if comp_name not in component_data: + component_data[comp_name] = OrderedDict() + component_data[comp_name][target_name] = (comp_config, target_attrs) + + models_dir = output_dir / "models" + comp_names_list = list(component_data.keys()) + + for comp_name in comp_names_list: + comp_dir = models_dir / comp_name + comp_dir.mkdir(parents=True, exist_ok=True) + + model_variants = {} + for target_name, (comp_config, target_attrs) in component_data[comp_name].items(): + comp_model_path = Path(comp_config["config"]["model_path"]) + target_dir = comp_dir / target_name + _copy_component_files(comp_model_path, target_dir) + + constraints = _build_constraints(target_attrs, comp_model_path) + model_variants[target_name] = {"file": comp_model_path.name, "constraints": constraints} + + _remove_config_files(comp_dir, config_file_names) + + metadata = {"name": comp_name, "model_variants": model_variants} + _write_json(comp_dir / "metadata.json", metadata) + + task = self._extract_task(targets) + manifest = { + "name": model_name, + "model_version": model_version, + "task": task, + "component_models": comp_names_list, + } + _write_json(output_dir / "manifest.json", manifest) + + # ------------------------------------------------------------------ + # Config file handling + # ------------------------------------------------------------------ + + @staticmethod + def _copy_config_files( + targets: list[tuple[str, Path, dict]], + output_dir: Path, + ) -> set[str]: + """Copy non-model config files (genai_config, tokenizer, etc.) to configs/.""" + config_entries: dict[str, Path] = {} + + # Collect from the first target's additional_files or source directory + for _target_name, _source_path, model_config in targets: + attrs = _get_model_attributes(model_config) + for fp in attrs.get("additional_files", []): + p = Path(fp) + if (p.is_file() or p.is_dir()) and p.name not in config_entries: + config_entries[p.name] = p + if config_entries: + break + + # Fall back to scanning the source directory for non-model files + if not config_entries: + for _target_name, source_path, _model_config in targets: + for f in sorted(source_path.iterdir()): + if f.name == "model_config.json": + continue + if (f.is_file() and f.suffix not in _MODEL_SUFFIXES) or f.is_dir(): + config_entries[f.name] = f + if config_entries: + break + + if not config_entries: + return set() + + configs_dir = output_dir / "configs" + configs_dir.mkdir(parents=True, exist_ok=True) + + for name, src_path in config_entries.items(): + dest = configs_dir / name + if src_path.is_dir(): + if not dest.exists(): + shutil.copytree(str(src_path), str(dest)) + else: + shutil.copy2(str(src_path), str(dest)) + logger.info("Copied %s to %s", name, configs_dir) + + return set(config_entries.keys()) + + # ------------------------------------------------------------------ + # Source validation and reading + # ------------------------------------------------------------------ + + def _parse_sources(self) -> list[tuple[str, Path]]: + sources = [] + for source in self.args.source: + path = Path(source) + if not path.is_dir(): + raise ValueError(f"Source path does not exist or is not a directory: {path}") + + if not (path / "model_config.json").exists(): + raise ValueError( + f"No model_config.json found in {path}. " + "Source must be an Olive output directory with model_config.json." + ) + + sources.append((path.name, path)) + + if len(sources) < 2: + raise ValueError("At least two --source directories are required to merge.") + + return sources + + @staticmethod + def _read_model_config(source_path: Path) -> dict: + config_path = source_path / "model_config.json" + with open(config_path) as f: + return json.load(f) + + @staticmethod + def _extract_accelerator_info(target_models: list[dict]) -> tuple[str, str]: + for model_config in target_models: + attrs = model_config.get("config", {}).get("model_attributes") or {} + ep = attrs.get("ep", "CPUExecutionProvider") + device = attrs.get("device", "cpu") + return ep, device.lower() + return "CPUExecutionProvider", "cpu" + + # ------------------------------------------------------------------ + # Task extraction + # ------------------------------------------------------------------ + + @staticmethod + def _extract_task(targets: list[tuple[str, Path, dict]]) -> str: + """Extract the HuggingFace pipeline task for the model.""" + model_name_or_path = "" + for _target_name, _source_path, model_config in targets: + attrs = _get_model_attributes(model_config) + model_name_or_path = attrs.get("_name_or_path", "") + if model_name_or_path: + break + + if not model_name_or_path: + return "" + + try: + from huggingface_hub import model_info + + info = model_info(model_name_or_path) + tag = info.pipeline_tag or "" + return tag.replace("-", "_") + except Exception: + logger.debug("Could not fetch task from HuggingFace Hub for %s", model_name_or_path, exc_info=True) + return "" + + +# ------------------------------------------------------------------ +# Module-level helpers +# ------------------------------------------------------------------ + + +def _get_model_attributes(model_config: dict) -> dict: + return model_config.get("config", {}).get("model_attributes") or {} + + +def _write_json(path: Path, data: dict) -> None: + with open(path, "w") as f: + json.dump(data, f, indent=2) + logger.info("Generated %s", path) + + +def _build_constraints(attrs: dict, model_path: Path) -> dict: + """Build variant constraints from model attributes and ONNX metadata.""" + constraints = {} + ep = attrs.get("ep") + if ep: + constraints["ep"] = ep + device = attrs.get("device") + if device: + constraints["device"] = device + ep_compat = _extract_ep_compatibility_from_onnx(model_path, ep or "") + constraints["ep_compatibility_info"] = ep_compat or "" + return constraints + + +def _extract_ep_compatibility_from_onnx(model_path: Path, ep: str = "") -> Optional[str]: + """Extract ep_compatibility_info from ONNX model custom metadata.""" + if not model_path.is_file(): + return None + + try: + import onnx + + onnx_model = onnx.load(str(model_path), load_external_data=False) + prefix = "ep_compatibility_info." + ep_compat_map = { + entry.key[len(prefix) :]: entry.value for entry in onnx_model.metadata_props if entry.key.startswith(prefix) + } + except Exception: + logger.debug("Could not read ONNX metadata from %s", model_path, exc_info=True) + return None + + if not ep_compat_map: + return None + if ep and ep in ep_compat_map: + return ep_compat_map[ep] + if len(ep_compat_map) == 1: + return next(iter(ep_compat_map.values())) + return None + + +def _copy_model_files_single(model_path: Path, dest_dir: Path) -> None: + """Copy model files for a single ONNX model into dest_dir.""" + if dest_dir.exists(): + return + + src_dir = model_path.parent if model_path.is_file() else model_path + if src_dir.is_dir(): + shutil.copytree(str(src_dir), str(dest_dir)) + else: + dest_dir.mkdir(parents=True, exist_ok=True) + shutil.copy2(str(model_path), str(dest_dir)) + + +def _copy_component_files(model_path: Path, dest_dir: Path) -> None: + """Copy files for a single ONNX component to dest_dir. + + Copies the .onnx file and its associated context binary (.bin) files + and external data files. + """ + if dest_dir.exists(): + return + + dest_dir.mkdir(parents=True, exist_ok=True) + src_dir = model_path.parent + + # Copy the ONNX file itself + shutil.copy2(str(model_path), str(dest_dir / model_path.name)) + + # Find associated files + associated_files: set[str] = set() + try: + from olive.passes.onnx.common import get_context_bin_file_names + + associated_files.update(get_context_bin_file_names(str(model_path))) + except Exception: + logger.debug("Could not read context binary file names from %s", model_path, exc_info=True) + + try: + import onnx + + onnx_model = onnx.load(str(model_path), load_external_data=False) + for init in onnx_model.graph.initializer: + if init.data_location == onnx.TensorProto.EXTERNAL: + for entry in init.external_data: + if entry.key == "location": + associated_files.add(entry.value) + except Exception: + logger.debug("Could not read ONNX external data from %s", model_path, exc_info=True) + + for file_name in associated_files: + src = src_dir / file_name + if src.is_file(): + shutil.copy2(str(src), str(dest_dir / file_name)) + + +def _remove_config_files(component_dir: Path, config_file_names: set[str]) -> None: + """Remove config files from variant subdirectories (they belong in configs/).""" + for name in config_file_names: + for p in component_dir.rglob(name): + if p.is_dir(): + shutil.rmtree(str(p)) + else: + p.unlink() + logger.debug("Removed duplicate config entry %s from variant directory", p) + + +def _task_to_component_name(task: str) -> str: + """Map a task string to a component name for single-component models.""" + task_component_map = { + "text_generation": "decoder", + "text2text_generation": "encoder_decoder", + "text_classification": "classifier", + "token_classification": "token_classifier", + "question_answering": "qa_model", + "image_generation": "image_generator", + "image_classification": "image_classifier", + "object_detection": "object_detector", + "automatic_speech_recognition": "speech_recognizer", + } + return task_component_map.get(task, "model") diff --git a/olive/cli/optimize.py b/olive/cli/optimize.py index 6d94c1407c..7782530a1b 100644 --- a/olive/cli/optimize.py +++ b/olive/cli/optimize.py @@ -582,7 +582,7 @@ def _get_matmul_nbits_to_qdq_pass_config(self) -> dict[str, Any]: "add_zero_point": "true", "save_as_external_data": "true", } - config["nodes_to_exclude"] = ["/lm_head/MatMul_Q4"] + config["nodes_to_exclude"] = ["/lm_head/MatMulNBits", "/lm_head/MatMul_Q4"] if precision.value == Precision.INT4: config["use_int4"] = "true" return config diff --git a/olive/engine/engine.py b/olive/engine/engine.py index de6b7019a3..5d7cee3f26 100644 --- a/olive/engine/engine.py +++ b/olive/engine/engine.py @@ -195,15 +195,14 @@ def run( self.initialize(log_to_file, log_severity_level) output_dir: Path = (Path(output_dir) if output_dir else Path.cwd()).resolve() - if output_dir.suffix: + # Treat as file path only if it has a suffix and is not an existing directory + is_file_path = output_dir.suffix and not output_dir.is_dir() + if is_file_path: output_dir.parent.mkdir(parents=True, exist_ok=True) + artifacts_dir = output_dir.parent else: output_dir.mkdir(parents=True, exist_ok=True) - - # Determine the directory for artifacts (run_history, etc.) - # If output_dir is a file path (has suffix), use parent directory - # Otherwise use output_dir itself - artifacts_dir = output_dir.parent if output_dir.suffix else output_dir + artifacts_dir = output_dir logger.info("Running Olive on accelerator: %s", accelerator_spec) with self._create_system(): @@ -254,10 +253,8 @@ def run_accelerator( self.footprint.record(is_input_model=True, model_id=input_model_id) - # Determine the directory for artifacts - # If output_dir is a file path (has suffix like .onnx), use parent directory - # Otherwise use output_dir itself - artifacts_dir = output_dir.parent if output_dir.suffix else output_dir + # Artifacts directory: file path (has suffix, not existing dir) uses parent + artifacts_dir = output_dir.parent if (output_dir.suffix and not output_dir.is_dir()) else output_dir try: if evaluate_input_model and not self.evaluator_config: diff --git a/olive/passes/olive_pass.py b/olive/passes/olive_pass.py index 627202a0c7..c0062b5cf6 100644 --- a/olive/passes/olive_pass.py +++ b/olive/passes/olive_pass.py @@ -245,7 +245,7 @@ def run(self, model: OliveModelHandler, output_model_path: str) -> OliveModelHan # assumption: the model attributes from passes, if any, are more important than # the input model attributes, we should not update/extend anymore outside of the pass run output_model.model_attributes = output_model.model_attributes or model.model_attributes - # save and carry forward additional files into the the output model path + # save and carry forward additional files into the output model path Pass._carry_forward_additional_files(model, output_model) return output_model @@ -287,7 +287,10 @@ def _carry_forward_additional_files(input_model: OliveModelHandler, output_model output_filepath = output_model_path / input_filepath.name if not output_filepath.exists(): # TODO(team): Use symlinks instead of copying the files. - shutil.copy(str(input_filepath), str(output_filepath)) + if input_filepath.is_dir(): + shutil.copytree(str(input_filepath), str(output_filepath)) + else: + shutil.copy(str(input_filepath), str(output_filepath)) # always add the file_path to the output model's additional files # this covers the case where the output model_path is the same as the input model_path # like for perf-tuning pass diff --git a/olive/passes/onnx/context_binary.py b/olive/passes/onnx/context_binary.py index d802fcc575..6747dc74f0 100644 --- a/olive/passes/onnx/context_binary.py +++ b/olive/passes/onnx/context_binary.py @@ -74,8 +74,6 @@ def _run_for_config( config: type[BasePassConfig], output_model_path: str, ) -> Union[ONNXModelHandler, CompositeModelHandler]: - from onnxruntime import __version__ as OrtVersion - # session created using providers argument so will use the ort.get_available_providers() # TODO(jambayk): consider switching to the new EP API for Windows from onnxruntime import get_available_providers @@ -89,6 +87,27 @@ def _run_for_config( f" {get_available_providers()}" ) + result = self._run_single_target(model, config, output_model_path) + + # Populate model_attributes with context binary metadata so it persists in model_config.json + result.model_attributes = {**(model.model_attributes or {}), **(result.model_attributes or {})} + result.model_attributes["ep"] = self.accelerator_spec.execution_provider + result.model_attributes["device"] = str(self.accelerator_spec.accelerator_type).upper() + if config.provider_options: + result.model_attributes["provider_options"] = config.provider_options + result.model_attributes["architecture"] = config.provider_options.get("soc_model") + + return result + + def _run_single_target( + self, + model: Union[ONNXModelHandler, CompositeModelHandler], + config: type[BasePassConfig], + output_model_path: str, + ) -> Union[ONNXModelHandler, CompositeModelHandler]: + """Generate context binary for a single target. This is the original logic.""" + from onnxruntime import __version__ as OrtVersion + generate_kwargs = { "execution_provider": self.accelerator_spec.execution_provider, "provider_options": config.provider_options, diff --git a/olive/passes/onnx/model_builder.py b/olive/passes/onnx/model_builder.py index 978744ec1c..e2539fecac 100644 --- a/olive/passes/onnx/model_builder.py +++ b/olive/passes/onnx/model_builder.py @@ -214,12 +214,12 @@ def _run_for_config( ) -> ONNXModelHandler: try: from onnxruntime_genai.models.builder import create_model - except ImportError: + except ImportError as e: raise ImportError( "onnxruntime-genai package is required to run ModelBuilder pass. Please install the package" " corresponding to your onnxruntime installation using pip. cpu: onnxruntime-genai, cuda:" " onnxruntime-genai-cuda, directml: onnxruntime-genai-directml" - ) from None + ) from e self.maybe_patch_quant() precision = config.precision diff --git a/olive/passes/openvino/encapsulation.py b/olive/passes/openvino/encapsulation.py index c8e24a2b37..055f13cf19 100644 --- a/olive/passes/openvino/encapsulation.py +++ b/olive/passes/openvino/encapsulation.py @@ -66,8 +66,7 @@ def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassCon default_value=None, required=False, description=( - "Name of the OpenVINO version to override in model SDK version." - "Requires a minimum version of OpenVINO 2025.1" + "OpenVINO version to override in model SDK version. Requires a minimum version of OpenVINO 2025.1" ), ), "opset_imports": PassConfigParam( @@ -115,6 +114,15 @@ def _run_for_config( config: type[BasePassConfig], output_model_path: str, ) -> ONNXModelHandler: + return self._run_single_target(model, config, output_model_path) + + def _run_single_target( + self, + model: Union[OpenVINOModelHandler], + config: type[BasePassConfig], + output_model_path: str, + ) -> ONNXModelHandler: + """Encapsulate a single OpenVINO model. This is the original logic.""" try: import openvino as ov except ImportError: @@ -245,7 +253,25 @@ def _run_for_config( # generate the genai_config.json file for GenAI models create_genai_config(context_model_output, output_model_path, config) - return ONNXModelHandler(model_path=output_model_path) + # Collect config files (non-model files) for downstream ModelPackage + output_path = Path(output_model_path) + model_suffixes = {".onnx", ".xml", ".bin"} + additional_files = [ + str(f) + for f in sorted(output_path.iterdir()) + if (f.is_file() and f.suffix not in model_suffixes) or f.is_dir() + ] + + # Populate model_attributes with context binary metadata so it persists in model_config.json + context_binary_attrs = { + **(model.model_attributes or {}), + "ep": "OpenVINOExecutionProvider", + "device": str(config.target_device).upper(), + "sdk_version": ov_version, + "additional_files": additional_files, + } + + return ONNXModelHandler(model_path=output_model_path, model_attributes=context_binary_attrs) def extract_shape_list(shape, config, prefix: str = "input_0_") -> list: diff --git a/olive/passes/openvino/optimum_intel.py b/olive/passes/openvino/optimum_intel.py index 2105f512f2..d898e665eb 100644 --- a/olive/passes/openvino/optimum_intel.py +++ b/olive/passes/openvino/optimum_intel.py @@ -3,6 +3,7 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- import logging +import os from copy import deepcopy from pathlib import Path from typing import Any, Optional, Union @@ -497,6 +498,17 @@ def _run_for_config( extra_args.pop("disable_convert_tokenizer", False) extra_args["library_name"] = lib_name extra_args.pop("library", None) + + # Workaround for optimum-intel using Path.rename() which fails across filesystems. + # Set tempdir to output path so temp files are on the same filesystem as the cache. + import tempfile + + Path(output_model_path).mkdir(parents=True, exist_ok=True) + original_tmpdir = os.environ.get("TMPDIR") + original_tempdir = tempfile.tempdir + os.environ["TMPDIR"] = output_model_path + tempfile.tempdir = output_model_path + export_optimum_intel( model.model_name_or_path, output_model_path, @@ -516,7 +528,13 @@ def _run_for_config( model_kwargs=model.load_kwargs.__dict__ if model.load_kwargs else None, ) except Exception as e: - raise RuntimeError(f"OpenVINO optimum export failed: {e}") from None + raise RuntimeError(f"OpenVINO optimum export failed: {e}") from e + finally: + tempfile.tempdir = original_tempdir + if original_tmpdir is None: + os.environ.pop("TMPDIR", None) + else: + os.environ["TMPDIR"] = original_tmpdir # check the exported components exported_models = [name.stem for name in Path(output_model_path).iterdir() if name.suffix == ".xml"] diff --git a/olive/systems/system_config.py b/olive/systems/system_config.py index dab5da3503..5addeadc61 100644 --- a/olive/systems/system_config.py +++ b/olive/systems/system_config.py @@ -6,7 +6,7 @@ from pathlib import Path from typing import Optional, Union -from pydantic import ConfigDict, Field, field_validator +from pydantic import ConfigDict, Field, SerializeAsAny, field_validator from olive.common.config_utils import ConfigBase, NestedConfig, validate_config from olive.systems.common import AcceleratorConfig, SystemType @@ -88,7 +88,7 @@ def import_system_from_type(system_type: SystemType): class SystemConfig(NestedConfig): type: SystemType - config: Optional[TargetUserConfig] = Field(default=None, validate_default=True) + config: Optional[SerializeAsAny[TargetUserConfig]] = Field(default=None, validate_default=True) @field_validator("config", mode="before") @classmethod diff --git a/test/cli/test_model_package.py b/test/cli/test_model_package.py new file mode 100644 index 0000000000..ba34485672 --- /dev/null +++ b/test/cli/test_model_package.py @@ -0,0 +1,147 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +# pylint: disable=protected-access +import json +from argparse import ArgumentParser + +import pytest + +from olive.cli.model_package import ModelPackageCommand + + +def _create_source_dir(tmp_path, name, model_attributes): + """Create a fake Olive output directory with model_config.json and a dummy .onnx file.""" + source_dir = tmp_path / name + source_dir.mkdir(parents=True) + model_config = { + "type": "ONNXModel", + "config": {"model_path": str(source_dir / "model.onnx"), "model_attributes": model_attributes}, + } + (source_dir / "model_config.json").write_text(json.dumps(model_config)) + (source_dir / "model.onnx").write_text("dummy") + return source_dir + + +def _make_command(args_list): + """Create a ModelPackageCommand instance from CLI args.""" + parser = ArgumentParser() + commands_parser = parser.add_subparsers() + ModelPackageCommand.register_subcommand(commands_parser) + parsed_args, unknown = parser.parse_known_args(args_list) + return parsed_args.func(parser, parsed_args, unknown) + + +class TestSourceValidation: + """Tests for _parse_sources validation logic.""" + + def test_rejects_single_source(self, tmp_path): + # setup + src = _create_source_dir(tmp_path, "soc_60", {"ep": "QNNExecutionProvider"}) + cmd = _make_command(["generate-model-package", "-s", str(src), "-o", str(tmp_path / "out")]) + + # execute + assert + with pytest.raises(ValueError, match="At least two"): + cmd._parse_sources() + + def test_rejects_missing_model_config(self, tmp_path): + # setup + no_config = tmp_path / "no_config" + no_config.mkdir() + valid = _create_source_dir(tmp_path, "valid", {"ep": "QNNExecutionProvider"}) + cmd = _make_command( + ["generate-model-package", "-s", str(no_config), "-s", str(valid), "-o", str(tmp_path / "out")] + ) + + # execute + assert + with pytest.raises(ValueError, match="model_config.json"): + cmd._parse_sources() + + def test_rejects_nonexistent_path(self, tmp_path): + # setup + valid = _create_source_dir(tmp_path, "valid", {"ep": "QNNExecutionProvider"}) + cmd = _make_command( + ["generate-model-package", "-s", "/nonexistent/path", "-s", str(valid), "-o", str(tmp_path / "out")] + ) + + # execute + assert + with pytest.raises(ValueError, match="does not exist"): + cmd._parse_sources() + + def test_parses_two_valid_sources(self, tmp_path): + # setup + src1 = _create_source_dir(tmp_path, "soc_60", {"ep": "QNNExecutionProvider"}) + src2 = _create_source_dir(tmp_path, "soc_73", {"ep": "QNNExecutionProvider"}) + cmd = _make_command(["generate-model-package", "-s", str(src1), "-s", str(src2), "-o", str(tmp_path / "out")]) + + # execute + sources = cmd._parse_sources() + + # assert + assert len(sources) == 2 + assert sources[0] == ("soc_60", src1) + assert sources[1] == ("soc_73", src2) + + +class TestGeneratePackageSingle: + """Tests for single-component model package generation.""" + + def test_generates_manifest_and_metadata(self, tmp_path): + """Package output should have manifest.json and metadata.json.""" + # setup + src1 = _create_source_dir(tmp_path, "soc_60", {"ep": "QNNExecutionProvider", "device": "NPU"}) + src2 = _create_source_dir(tmp_path, "soc_73", {"ep": "QNNExecutionProvider", "device": "NPU"}) + out_dir = tmp_path / "out" + cmd = _make_command( + [ + "generate-model-package", + "-s", + str(src1), + "-s", + str(src2), + "-o", + str(out_dir), + "--model_name", + "test_model", + "--model_version", + "2.0", + ] + ) + + # execute + cmd.run() + + # assert: manifest + manifest_path = out_dir / "manifest.json" + assert manifest_path.exists() + manifest = json.loads(manifest_path.read_text()) + assert manifest["name"] == "test_model" + assert manifest["model_version"] == "2.0" + assert "component_models" in manifest + + # assert: metadata in component dir + component_name = manifest["component_models"][0] + metadata_path = out_dir / "models" / component_name / "metadata.json" + assert metadata_path.exists() + metadata = json.loads(metadata_path.read_text()) + assert "soc_60" in metadata["model_variants"] + assert "soc_73" in metadata["model_variants"] + + # assert: constraints + for variant in metadata["model_variants"].values(): + assert variant["constraints"]["ep"] == "QNNExecutionProvider" + assert variant["constraints"]["device"] == "NPU" + + +class TestAcceleratorInfo: + """Test accelerator info extraction.""" + + def test_defaults_accelerator_when_no_attributes(self): + """Falls back to CPUExecutionProvider/cpu when model_attributes is empty.""" + # setup + execute + ep, device = ModelPackageCommand._extract_accelerator_info([{"type": "ONNXModel", "config": {}}]) + + # assert + assert ep == "CPUExecutionProvider" + assert device == "cpu" diff --git a/test/passes/onnx/test_context_binary.py b/test/passes/onnx/test_context_binary.py index deee87c550..7897167df3 100644 --- a/test/passes/onnx/test_context_binary.py +++ b/test/passes/onnx/test_context_binary.py @@ -132,3 +132,50 @@ def test_ep_context_binary_generator_composite(tmp_path, is_llm): assert expected_model_path.exists() if not is_skipped: assert len(list(output_model_path.glob(f"{name}_ctx*.bin"))) == 1 + + +def _mock_get_available_providers(): + return ["QNNExecutionProvider", "CPUExecutionProvider"] + + +def test_single_target_populates_model_attributes(tmp_path): + """Single-target mode should populate model_attributes.""" + from pathlib import Path + from unittest.mock import patch + + accelerator_spec = AcceleratorSpec(accelerator_type="NPU", execution_provider="QNNExecutionProvider") + + p = create_pass_from_dict( + EPContextBinaryGenerator, + { + "provider_options": { + "soc_model": "60", + "htp_performance_mode": "burst", + }, + }, + disable_search=True, + accelerator_spec=accelerator_spec, + ) + + with ( + patch.object(EPContextBinaryGenerator, "_run_single_target") as mock_single, + patch("onnxruntime.get_available_providers", _mock_get_available_providers), + ): + + def side_effect(model, config, output_model_path): + out_path = Path(output_model_path) + out_path.parent.mkdir(parents=True, exist_ok=True) + out_path.write_text("dummy") + return ONNXModelHandler(model_path=str(out_path)) + + mock_single.side_effect = side_effect + + input_model = get_onnx_model() + output_path = str(tmp_path / "output.onnx") + result = p.run(input_model, output_path) + + assert isinstance(result, ONNXModelHandler) + assert result.model_attributes["ep"] == "QNNExecutionProvider" + assert result.model_attributes["device"] == "NPU" + assert result.model_attributes["architecture"] == "60" + assert result.model_attributes["provider_options"]["soc_model"] == "60" diff --git a/test/passes/openvino/test_openvino_encapsulation.py b/test/passes/openvino/test_openvino_encapsulation.py index bfbc15a260..0387bf89b4 100644 --- a/test/passes/openvino/test_openvino_encapsulation.py +++ b/test/passes/openvino/test_openvino_encapsulation.py @@ -3,9 +3,12 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- from pathlib import Path +from unittest.mock import MagicMock, patch import pytest +from olive.hardware.accelerator import AcceleratorSpec, Device +from olive.model import ONNXModelHandler from olive.passes.olive_pass import create_pass_from_dict from olive.passes.openvino.conversion import OpenVINOConversion from olive.passes.openvino.encapsulation import OpenVINOEncapsulation @@ -101,3 +104,43 @@ def test_openvino_encapsulate_pass_dynamic_keep_ov_dynamic_dims(tmp_path): # assert assert Path(onnx_model.model_path).exists() assert (Path(onnx_model.model_path)).is_file() + + +def test_single_target_populates_model_attributes(tmp_path): + accelerator_spec = AcceleratorSpec(accelerator_type=Device.NPU, execution_provider="OpenVINOExecutionProvider") + + p = create_pass_from_dict( + OpenVINOEncapsulation, + {"ov_version": "2025.1", "target_device": "npu"}, + disable_search=True, + accelerator_spec=accelerator_spec, + ) + + with patch.object(OpenVINOEncapsulation, "_run_single_target") as mock_single: + + def side_effect(model, config, output_model_path): + out_dir = Path(output_model_path) + out_dir.parent.mkdir(parents=True, exist_ok=True) + out_dir.mkdir(parents=True, exist_ok=True) + model_file = out_dir / "model.onnx" + model_file.write_text("dummy") + return ONNXModelHandler( + model_path=str(model_file), + model_attributes={ + "ep": "OpenVINOExecutionProvider", + "device": "NPU", + "sdk_version": "2025.1", + "architecture": "NPU", + }, + ) + + mock_single.side_effect = side_effect + + input_model = MagicMock() + input_model.model_attributes = {} + output_path = str(tmp_path / "output.onnx") + result = p.run(input_model, output_path) + + assert isinstance(result, ONNXModelHandler) + assert result.model_attributes["ep"] == "OpenVINOExecutionProvider" + assert result.model_attributes["sdk_version"] == "2025.1"