Skip to content
Merged
11 changes: 11 additions & 0 deletions docs/source/reference/cli.rst
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,17 @@ Run benchmarking using llm-eval.
:prog: olive
:path: benchmark

Generate Model Package
======================

Merge multiple model outputs into a model package with manifest and per-component metadata.

.. argparse::
:module: olive.cli.launcher
:func: get_cli_parser
:prog: olive
:path: generate-model-package

Providing Input Models
======================

Expand Down
6 changes: 6 additions & 0 deletions docs/source/reference/pass.rst
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,12 @@ EPContextBinaryGenerator
------------------------
.. autoconfigclass:: olive.passes.EPContextBinaryGenerator

.. _model_package:

ModelPackage
------------
.. autoconfigclass:: olive.passes.ModelPackage

.. _compose_onnx_models:

ComposeOnnxModels
Expand Down
56 changes: 51 additions & 5 deletions olive/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,14 +384,60 @@ def save_model(
):
"""Save a model from the cache to a given path."""
output_dir = Path(output_dir) if output_dir else Path.cwd()

# If output_dir has a suffix (like .onnx), it's a file path
# Use parent directory for saving files
actual_output_dir = output_dir.parent if output_dir.suffix else output_dir
if output_dir.suffix and not output_dir.is_dir():
actual_output_dir = output_dir.parent
else:
actual_output_dir = output_dir
actual_output_dir.mkdir(parents=True, exist_ok=True)

model_json = self.load_model(model_id)
if model_json["type"].lower() == "compositemodel":
if model_json["type"].lower() == "modelpackagemodel":
Comment thread
jambayk marked this conversation as resolved.
Outdated
model_json_config = model_json["config"]
source_path = Path(model_json_config["model_path"])
actual_output_dir.mkdir(parents=True, exist_ok=True)

if source_path.exists():
# Only copy target subdirectories (soc_60/, soc_73/, etc.) and manifest.json.
# Skip top-level additional_files (tokenizer, config) since each target subdir has its own copy.
for item in source_path.iterdir():
dest = actual_output_dir / item.name
if item.is_dir():
shutil.copytree(str(item), str(dest), dirs_exist_ok=overwrite)
elif item.name == "manifest.json":
shutil.copy2(str(item), str(dest))

# Update paths to point to new location
model_json_config["model_path"] = str(actual_output_dir)

# Update target model paths
for target_model in model_json_config.get("target_models", []):
target_config = target_model.get("config", {})
old_model_path = target_config.get("model_path", "")
if old_model_path and str(source_path) in old_model_path:
target_config["model_path"] = old_model_path.replace(str(source_path), str(actual_output_dir))

# Clear additional_files since each target subdir has its own copies
model_attributes = model_json_config.get("model_attributes") or {}
model_attributes.pop("additional_files", None)

# Update manifest_path
if model_attributes.get("manifest_path"):
model_attributes["manifest_path"] = str(
actual_output_dir / Path(model_attributes["manifest_path"]).name
)

# Update manifest name: if pass config set model_name explicitly, keep it;
# otherwise update to the output directory name (e.g., "qwen_2.5_1.5b_Instruct")
manifest_file = actual_output_dir / "manifest.json"
if manifest_file.exists():
manifest = json.loads(manifest_file.read_text())
# The pass defaults model_name to the cache dir name (not meaningful).
# Replace it with the final output directory name unless it was explicitly configured.
source_dir_name = source_path.name if source_path else None
if not manifest.get("name") or manifest.get("name") == source_dir_name:
manifest["name"] = actual_output_dir.name
manifest_file.write_text(json.dumps(manifest, indent=2))
elif model_json["type"].lower() == "compositemodel":
model_json_config = model_json["config"]
model_attributes = model_json_config.get("model_attributes") or {}

Expand Down
2 changes: 2 additions & 0 deletions olive/cli/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from olive.cli.generate_adapter import GenerateAdapterCommand
from olive.cli.generate_cost_model import GenerateCostModelCommand
from olive.cli.init import InitCommand
from olive.cli.model_package import ModelPackageCommand
from olive.cli.optimize import OptimizeCommand
from olive.cli.quantize import QuantizeCommand
from olive.cli.run import WorkflowRunCommand
Expand Down Expand Up @@ -54,6 +55,7 @@ def get_cli_parser(called_as_console_script: bool = True) -> ArgumentParser:
ConfigureQualcommSDKCommand.register_subcommand(commands_parser)
SharedCacheCommand.register_subcommand(commands_parser)
ExtractAdaptersCommand.register_subcommand(commands_parser)
ModelPackageCommand.register_subcommand(commands_parser)
BenchmarkCommand.register_subcommand(commands_parser)

return parser
Expand Down
144 changes: 144 additions & 0 deletions olive/cli/model_package.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
import json
import logging
from argparse import ArgumentParser
from pathlib import Path
from typing import Any

from olive.cli.base import (
BaseOliveCLICommand,
add_logging_options,
add_save_config_file_options,
add_telemetry_options,
)
from olive.telemetry import action

logger = logging.getLogger(__name__)


class ModelPackageCommand(BaseOliveCLICommand):
"""Merge multiple model outputs into a model package via the ModelPackage pass."""

@staticmethod
def register_subcommand(parser: ArgumentParser):
sub_parser = parser.add_parser(
"generate-model-package",
help="Merge multiple model outputs into a model package with manifest",
)

sub_parser.add_argument(
"-s",
"--source",
type=str,
action="append",
required=True,
help="Source Olive output directory. Can be specified multiple times.",
)

sub_parser.add_argument(
"-o",
"--output_path",
type=str,
required=True,
help="Output directory for the merged model package.",
)

sub_parser.add_argument(
"--model_name",
type=str,
default=None,
help="Model name for the manifest. If not set, derived from the output directory name.",
)

sub_parser.add_argument(
"--model_version",
type=str,
default="1.0",
help="Model version string for the manifest. Default: 1.0",
)

add_logging_options(sub_parser)
add_save_config_file_options(sub_parser)
add_telemetry_options(sub_parser)
sub_parser.set_defaults(func=ModelPackageCommand)

def _get_run_config(self, tempdir: str) -> dict[str, Any]:
sources = self._parse_sources()

target_models = []
target_names = []
for target_name, source_path in sources:
model_config = self._read_model_config(source_path)
target_models.append(model_config)
target_names.append(target_name)

ep, device = self._extract_accelerator_info(target_models)

return {
"input_model": {
"type": "ModelPackageModel",
"target_models": target_models,
"target_names": target_names,
"model_path": tempdir,
},
"systems": {
"local_system": {
"type": "LocalSystem",
"accelerators": [{"device": device, "execution_providers": [ep]}],
}
},
"passes": {
"pkg": {
"type": "ModelPackage",
"model_name": self.args.model_name,
"model_version": self.args.model_version,
}
},
"output_dir": self.args.output_path,
"host": "local_system",
"target": "local_system",
"log_severity_level": self.args.log_level,
"no_artifacts": True,
}

@action
def run(self):
return self._run_workflow()

def _parse_sources(self) -> list[tuple[str, Path]]:
sources = []
for source in self.args.source:
path = Path(source)
if not path.is_dir():
raise ValueError(f"Source path does not exist or is not a directory: {path}")

if not (path / "model_config.json").exists():
raise ValueError(
f"No model_config.json found in {path}. "
"Source must be an Olive output directory with model_config.json."
)

sources.append((path.name, path))

Comment thread
xiaoyu-work marked this conversation as resolved.
if len(sources) < 2:
raise ValueError("At least two --source directories are required to merge.")

return sources

@staticmethod
def _read_model_config(source_path: Path) -> dict:
config_path = source_path / "model_config.json"
with open(config_path) as f:
return json.load(f)

@staticmethod
def _extract_accelerator_info(target_models: list[dict]) -> tuple[str, str]:
for model_config in target_models:
attrs = model_config.get("config", {}).get("model_attributes") or {}
ep = attrs.get("ep", "CPUExecutionProvider")
device = attrs.get("device", "cpu")
return ep, device.lower()
return "CPUExecutionProvider", "cpu"
2 changes: 1 addition & 1 deletion olive/cli/optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -582,7 +582,7 @@ def _get_matmul_nbits_to_qdq_pass_config(self) -> dict[str, Any]:
"add_zero_point": "true",
"save_as_external_data": "true",
}
config["nodes_to_exclude"] = ["/lm_head/MatMul_Q4"]
config["nodes_to_exclude"] = ["/lm_head/MatMulNBits"]
Comment thread
jambayk marked this conversation as resolved.
Outdated
if precision.value == Precision.INT4:
config["use_int4"] = "true"
return config
Expand Down
17 changes: 7 additions & 10 deletions olive/engine/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,15 +195,14 @@ def run(
self.initialize(log_to_file, log_severity_level)

output_dir: Path = (Path(output_dir) if output_dir else Path.cwd()).resolve()
if output_dir.suffix:
# Treat as file path only if it has a suffix and is not an existing directory
is_file_path = output_dir.suffix and not output_dir.is_dir()
if is_file_path:
output_dir.parent.mkdir(parents=True, exist_ok=True)
artifacts_dir = output_dir.parent
else:
output_dir.mkdir(parents=True, exist_ok=True)

# Determine the directory for artifacts (run_history, etc.)
# If output_dir is a file path (has suffix), use parent directory
# Otherwise use output_dir itself
artifacts_dir = output_dir.parent if output_dir.suffix else output_dir
artifacts_dir = output_dir
Comment thread
xiaoyu-work marked this conversation as resolved.

logger.info("Running Olive on accelerator: %s", accelerator_spec)
with self._create_system():
Expand Down Expand Up @@ -254,10 +253,8 @@ def run_accelerator(

self.footprint.record(is_input_model=True, model_id=input_model_id)

# Determine the directory for artifacts
# If output_dir is a file path (has suffix like .onnx), use parent directory
# Otherwise use output_dir itself
artifacts_dir = output_dir.parent if output_dir.suffix else output_dir
# Artifacts directory: file path (has suffix, not existing dir) uses parent
artifacts_dir = output_dir.parent if (output_dir.suffix and not output_dir.is_dir()) else output_dir

try:
if evaluate_input_model and not self.evaluator_config:
Expand Down
2 changes: 2 additions & 0 deletions olive/model/handler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from olive.model.handler.composite import CompositeModelHandler
from olive.model.handler.diffusers import DiffusersModelHandler
from olive.model.handler.hf import DistributedHfModelHandler, HfModelHandler
from olive.model.handler.model_package import ModelPackageModelHandler
from olive.model.handler.onnx import DistributedOnnxModelHandler, ONNXModelHandler
from olive.model.handler.openvino import OpenVINOModelHandler
from olive.model.handler.pytorch import PyTorchModelHandler
Expand All @@ -19,6 +20,7 @@
"DistributedHfModelHandler",
"DistributedOnnxModelHandler",
"HfModelHandler",
"ModelPackageModelHandler",
"ONNXModelHandler",
"OliveModelHandler",
"OpenVINOModelHandler",
Expand Down
Loading
Loading