diff --git a/docs/source/reference/cli.rst b/docs/source/reference/cli.rst
index 6be2798fa3..76bec2fabf 100644
--- a/docs/source/reference/cli.rst
+++ b/docs/source/reference/cli.rst
@@ -170,6 +170,17 @@ Run benchmarking using llm-eval.
     :prog: olive
     :path: benchmark
 
+Generate Model Package
+======================
+
+Merge multiple model outputs into a model package with manifest and per-component metadata.
+
+.. argparse::
+    :module: olive.cli.launcher
+    :func: get_cli_parser
+    :prog: olive
+    :path: generate-model-package
+
 Providing Input Models
 ======================
 
diff --git a/olive/cache.py b/olive/cache.py
index fe351057b9..22b13eae5b 100644
--- a/olive/cache.py
+++ b/olive/cache.py
@@ -384,10 +384,10 @@ def save_model(
     ):
         """Save a model from the cache to a given path."""
         output_dir = Path(output_dir) if output_dir else Path.cwd()
-
-        # If output_dir has a suffix (like .onnx), it's a file path
-        # Use parent directory for saving files
-        actual_output_dir = output_dir.parent if output_dir.suffix else output_dir
+        if output_dir.suffix and not output_dir.is_dir():
+            actual_output_dir = output_dir.parent
+        else:
+            actual_output_dir = output_dir
         actual_output_dir.mkdir(parents=True, exist_ok=True)
 
         model_json = self.load_model(model_id)
diff --git a/olive/cli/launcher.py b/olive/cli/launcher.py
index fed339f87d..55e6ffdeb4 100644
--- a/olive/cli/launcher.py
+++ b/olive/cli/launcher.py
@@ -17,6 +17,7 @@
 from olive.cli.generate_adapter import GenerateAdapterCommand
 from olive.cli.generate_cost_model import GenerateCostModelCommand
 from olive.cli.init import InitCommand
+from olive.cli.model_package import ModelPackageCommand
 from olive.cli.optimize import OptimizeCommand
 from olive.cli.quantize import QuantizeCommand
 from olive.cli.run import WorkflowRunCommand
@@ -54,6 +55,7 @@ def get_cli_parser(called_as_console_script: bool = True) -> ArgumentParser:
     ConfigureQualcommSDKCommand.register_subcommand(commands_parser)
     SharedCacheCommand.register_subcommand(commands_parser)
     ExtractAdaptersCommand.register_subcommand(commands_parser)
+    ModelPackageCommand.register_subcommand(commands_parser)
     BenchmarkCommand.register_subcommand(commands_parser)
 
     return parser
diff --git a/olive/cli/model_package.py b/olive/cli/model_package.py
new file mode 100644
index 0000000000..c88223d746
--- /dev/null
+++ b/olive/cli/model_package.py
@@ -0,0 +1,448 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+import json
+import logging
+import shutil
+from argparse import ArgumentParser
+from pathlib import Path
+from typing import Optional
+
+from olive.cli.base import (
+    BaseOliveCLICommand,
+    add_logging_options,
+    add_telemetry_options,
+)
+from olive.telemetry import action
+
+logger = logging.getLogger(__name__)
+
+# Model file suffixes that belong in the models/ directory, not configs/
+_MODEL_SUFFIXES = {".onnx", ".bin", ".data", ".xml"}
+
+
+class ModelPackageCommand(BaseOliveCLICommand):
+    """Merge multiple Olive output directories into a model package with manifest."""
+
+    @staticmethod
+    def register_subcommand(parser: ArgumentParser):
+        sub_parser = parser.add_parser(
+            "generate-model-package",
+            help="Merge multiple model outputs into a model package with manifest",
+        )
+
+        sub_parser.add_argument(
+            "-s",
+            "--source",
+            type=str,
+            action="append",
+            required=True,
+            help="Source Olive output directory. Can be specified multiple times.",
+        )
+
+        sub_parser.add_argument(
+            "-o",
+            "--output_path",
+            type=str,
+            required=True,
+            help="Output directory for the merged model package.",
+        )
+
+        sub_parser.add_argument(
+            "--model_name",
+            type=str,
+            default=None,
+            help="Model name for the manifest. If not set, derived from the output directory name.",
+        )
+
+        sub_parser.add_argument(
+            "--model_version",
+            type=str,
+            default="1.0",
+            help="Model version string for the manifest. Default: 1.0",
+        )
+
+        add_logging_options(sub_parser)
+        add_telemetry_options(sub_parser)
+        sub_parser.set_defaults(func=ModelPackageCommand)
+
+    @action
+    def run(self):
+        sources = self._parse_sources()
+        output_dir = Path(self.args.output_path)
+        output_dir.mkdir(parents=True, exist_ok=True)
+
+        model_name = self.args.model_name or output_dir.name
+        model_version = self.args.model_version
+
+        # Read model configs from each source
+        targets = []
+        for target_name, source_path in sources:
+            model_config = self._read_model_config(source_path)
+            targets.append((target_name, source_path, model_config))
+
+        is_composite = targets[0][2].get("type") == "CompositeModel"
+        if is_composite:
+            self._package_composite(targets, output_dir, model_name, model_version)
+        else:
+            self._package_single(targets, output_dir, model_name, model_version)
+
+        logger.info("Model package generated at %s", output_dir)
+        # ruff: noqa: T201
+        print(f"Model package generated at {output_dir}")
+
+    # ------------------------------------------------------------------
+    # Single-component packaging
+    # ------------------------------------------------------------------
+
+    def _package_single(
+        self,
+        targets: list[tuple[str, Path, dict]],
+        output_dir: Path,
+        model_name: str,
+        model_version: str,
+    ) -> None:
+        """Package non-composite models (single ONNX per target)."""
+        config_file_names = self._copy_config_files(targets, output_dir)
+        task = self._extract_task(targets)
+        component_name = _task_to_component_name(task)
+
+        component_dir = output_dir / "models" / component_name
+        component_dir.mkdir(parents=True, exist_ok=True)
+
+        model_variants = {}
+        for target_name, _source_path, model_config in targets:
+            attrs = _get_model_attributes(model_config)
+            model_path = Path(model_config["config"]["model_path"])
+
+            target_dir = component_dir / target_name
+            _copy_model_files_single(model_path, target_dir)
+
+            constraints = _build_constraints(attrs, model_path)
+            model_variants[target_name] = {"file": model_path.name, "constraints": constraints}
+
+        _remove_config_files(component_dir, config_file_names)
+
+        metadata = {"name": component_name, "model_variants": model_variants}
+        _write_json(component_dir / "metadata.json", metadata)
+
+        manifest = {
+            "name": model_name,
+            "model_version": model_version,
+            "task": task,
+            "component_models": [component_name],
+        }
+        _write_json(output_dir / "manifest.json", manifest)
+
+    # ------------------------------------------------------------------
+    # Composite-model packaging
+    # ------------------------------------------------------------------
+
+    def _package_composite(
+        self,
+        targets: list[tuple[str, Path, dict]],
+        output_dir: Path,
+        model_name: str,
+        model_version: str,
+    ) -> None:
+        """Package composite models with per-component directory layout."""
+        config_file_names = self._copy_config_files(targets, output_dir)
+
+        # Collect component info: component_data[comp_name][target_name] = (comp_config, target_attrs)
+        from collections import OrderedDict
+
+        component_data: dict[str, dict] = OrderedDict()
+
+        for target_name, _source_path, model_config in targets:
+            target_attrs = _get_model_attributes(model_config)
+            components = model_config["config"].get("model_components", [])
+            component_names = model_config["config"].get("component_names", [])
+
+            for comp_config, comp_name in zip(components, component_names):
+                if comp_name not in component_data:
+                    component_data[comp_name] = OrderedDict()
+                component_data[comp_name][target_name] = (comp_config, target_attrs)
+
+        models_dir = output_dir / "models"
+        comp_names_list = list(component_data.keys())
+
+        for comp_name in comp_names_list:
+            comp_dir = models_dir / comp_name
+            comp_dir.mkdir(parents=True, exist_ok=True)
+
+            model_variants = {}
+            for target_name, (comp_config, target_attrs) in component_data[comp_name].items():
+                comp_model_path = Path(comp_config["config"]["model_path"])
+                target_dir = comp_dir / target_name
+                _copy_component_files(comp_model_path, target_dir)
+
+                constraints = _build_constraints(target_attrs, comp_model_path)
+                model_variants[target_name] = {"file": comp_model_path.name, "constraints": constraints}
+
+            _remove_config_files(comp_dir, config_file_names)
+
+            metadata = {"name": comp_name, "model_variants": model_variants}
+            _write_json(comp_dir / "metadata.json", metadata)
+
+        task = self._extract_task(targets)
+        manifest = {
+            "name": model_name,
+            "model_version": model_version,
+            "task": task,
+            "component_models": comp_names_list,
+        }
+        _write_json(output_dir / "manifest.json", manifest)
+
+    # ------------------------------------------------------------------
+    # Config file handling
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _copy_config_files(
+        targets: list[tuple[str, Path, dict]],
+        output_dir: Path,
+    ) -> set[str]:
+        """Copy non-model config files (genai_config, tokenizer, etc.) to configs/."""
+        config_entries: dict[str, Path] = {}
+
+        # Collect from the first target's additional_files or source directory
+        for _target_name, _source_path, model_config in targets:
+            attrs = _get_model_attributes(model_config)
+            for fp in attrs.get("additional_files", []):
+                p = Path(fp)
+                if (p.is_file() or p.is_dir()) and p.name not in config_entries:
+                    config_entries[p.name] = p
+            if config_entries:
+                break
+
+        # Fall back to scanning the source directory for non-model files
+        if not config_entries:
+            for _target_name, source_path, _model_config in targets:
+                for f in sorted(source_path.iterdir()):
+                    if f.name == "model_config.json":
+                        continue
+                    if (f.is_file() and f.suffix not in _MODEL_SUFFIXES) or f.is_dir():
+                        config_entries[f.name] = f
+                if config_entries:
+                    break
+
+        if not config_entries:
+            return set()
+
+        configs_dir = output_dir / "configs"
+        configs_dir.mkdir(parents=True, exist_ok=True)
+
+        for name, src_path in config_entries.items():
+            dest = configs_dir / name
+            if src_path.is_dir():
+                if not dest.exists():
+                    shutil.copytree(str(src_path), str(dest))
+            else:
+                shutil.copy2(str(src_path), str(dest))
+            logger.info("Copied %s to %s", name, configs_dir)
+
+        return set(config_entries.keys())
+
+    # ------------------------------------------------------------------
+    # Source validation and reading
+    # ------------------------------------------------------------------
+
+    def _parse_sources(self) -> list[tuple[str, Path]]:
+        sources = []
+        for source in self.args.source:
+            path = Path(source)
+            if not path.is_dir():
+                raise ValueError(f"Source path does not exist or is not a directory: {path}")
+
+            if not (path / "model_config.json").exists():
+                raise ValueError(
+                    f"No model_config.json found in {path}. "
+                    "Source must be an Olive output directory with model_config.json."
+                )
+
+            sources.append((path.name, path))
+
+        if len(sources) < 2:
+            raise ValueError("At least two --source directories are required to merge.")
+
+        return sources
+
+    @staticmethod
+    def _read_model_config(source_path: Path) -> dict:
+        config_path = source_path / "model_config.json"
+        with open(config_path) as f:
+            return json.load(f)
+
+    @staticmethod
+    def _extract_accelerator_info(target_models: list[dict]) -> tuple[str, str]:
+        for model_config in target_models:
+            attrs = model_config.get("config", {}).get("model_attributes") or {}
+            ep = attrs.get("ep", "CPUExecutionProvider")
+            device = attrs.get("device", "cpu")
+            return ep, device.lower()
+        return "CPUExecutionProvider", "cpu"
+
+    # ------------------------------------------------------------------
+    # Task extraction
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _extract_task(targets: list[tuple[str, Path, dict]]) -> str:
+        """Extract the HuggingFace pipeline task for the model."""
+        model_name_or_path = ""
+        for _target_name, _source_path, model_config in targets:
+            attrs = _get_model_attributes(model_config)
+            model_name_or_path = attrs.get("_name_or_path", "")
+            if model_name_or_path:
+                break
+
+        if not model_name_or_path:
+            return ""
+
+        try:
+            from huggingface_hub import model_info
+
+            info = model_info(model_name_or_path)
+            tag = info.pipeline_tag or ""
+            return tag.replace("-", "_")
+        except Exception:
+            logger.debug("Could not fetch task from HuggingFace Hub for %s", model_name_or_path, exc_info=True)
+            return ""
+
+
+# ------------------------------------------------------------------
+# Module-level helpers
+# ------------------------------------------------------------------
+
+
+def _get_model_attributes(model_config: dict) -> dict:
+    return model_config.get("config", {}).get("model_attributes") or {}
+
+
+def _write_json(path: Path, data: dict) -> None:
+    with open(path, "w") as f:
+        json.dump(data, f, indent=2)
+    logger.info("Generated %s", path)
+
+
+def _build_constraints(attrs: dict, model_path: Path) -> dict:
+    """Build variant constraints from model attributes and ONNX metadata."""
+    constraints = {}
+    ep = attrs.get("ep")
+    if ep:
+        constraints["ep"] = ep
+    device = attrs.get("device")
+    if device:
+        constraints["device"] = device
+    ep_compat = _extract_ep_compatibility_from_onnx(model_path, ep or "")
+    constraints["ep_compatibility_info"] = ep_compat or ""
+    return constraints
+
+
+def _extract_ep_compatibility_from_onnx(model_path: Path, ep: str = "") -> Optional[str]:
+    """Extract ep_compatibility_info from ONNX model custom metadata."""
+    if not model_path.is_file():
+        return None
+
+    try:
+        import onnx
+
+        onnx_model = onnx.load(str(model_path), load_external_data=False)
+        prefix = "ep_compatibility_info."
+        ep_compat_map = {
+            entry.key[len(prefix) :]: entry.value for entry in onnx_model.metadata_props if entry.key.startswith(prefix)
+        }
+    except Exception:
+        logger.debug("Could not read ONNX metadata from %s", model_path, exc_info=True)
+        return None
+
+    if not ep_compat_map:
+        return None
+    if ep and ep in ep_compat_map:
+        return ep_compat_map[ep]
+    if len(ep_compat_map) == 1:
+        return next(iter(ep_compat_map.values()))
+    return None
+
+
+def _copy_model_files_single(model_path: Path, dest_dir: Path) -> None:
+    """Copy model files for a single ONNX model into dest_dir."""
+    if dest_dir.exists():
+        return
+
+    src_dir = model_path.parent if model_path.is_file() else model_path
+    if src_dir.is_dir():
+        shutil.copytree(str(src_dir), str(dest_dir))
+    else:
+        dest_dir.mkdir(parents=True, exist_ok=True)
+        shutil.copy2(str(model_path), str(dest_dir))
+
+
+def _copy_component_files(model_path: Path, dest_dir: Path) -> None:
+    """Copy files for a single ONNX component to dest_dir.
+
+    Copies the .onnx file and its associated context binary (.bin) files
+    and external data files.
+    """
+    if dest_dir.exists():
+        return
+
+    dest_dir.mkdir(parents=True, exist_ok=True)
+    src_dir = model_path.parent
+
+    # Copy the ONNX file itself
+    shutil.copy2(str(model_path), str(dest_dir / model_path.name))
+
+    # Find associated files
+    associated_files: set[str] = set()
+    try:
+        from olive.passes.onnx.common import get_context_bin_file_names
+
+        associated_files.update(get_context_bin_file_names(str(model_path)))
+    except Exception:
+        logger.debug("Could not read context binary file names from %s", model_path, exc_info=True)
+
+    try:
+        import onnx
+
+        onnx_model = onnx.load(str(model_path), load_external_data=False)
+        for init in onnx_model.graph.initializer:
+            if init.data_location == onnx.TensorProto.EXTERNAL:
+                for entry in init.external_data:
+                    if entry.key == "location":
+                        associated_files.add(entry.value)
+    except Exception:
+        logger.debug("Could not read ONNX external data from %s", model_path, exc_info=True)
+
+    for file_name in associated_files:
+        src = src_dir / file_name
+        if src.is_file():
+            shutil.copy2(str(src), str(dest_dir / file_name))
+
+
+def _remove_config_files(component_dir: Path, config_file_names: set[str]) -> None:
+    """Remove config files from variant subdirectories (they belong in configs/)."""
+    for name in config_file_names:
+        for p in component_dir.rglob(name):
+            if p.is_dir():
+                shutil.rmtree(str(p))
+            else:
+                p.unlink()
+            logger.debug("Removed duplicate config entry %s from variant directory", p)
+
+
+def _task_to_component_name(task: str) -> str:
+    """Map a task string to a component name for single-component models."""
+    task_component_map = {
+        "text_generation": "decoder",
+        "text2text_generation": "encoder_decoder",
+        "text_classification": "classifier",
+        "token_classification": "token_classifier",
+        "question_answering": "qa_model",
+        "image_generation": "image_generator",
+        "image_classification": "image_classifier",
+        "object_detection": "object_detector",
+        "automatic_speech_recognition": "speech_recognizer",
+    }
+    return task_component_map.get(task, "model")
diff --git a/olive/cli/optimize.py b/olive/cli/optimize.py
index 6d94c1407c..7782530a1b 100644
--- a/olive/cli/optimize.py
+++ b/olive/cli/optimize.py
@@ -582,7 +582,7 @@ def _get_matmul_nbits_to_qdq_pass_config(self) -> dict[str, Any]:
             "add_zero_point": "true",
             "save_as_external_data": "true",
         }
-        config["nodes_to_exclude"] = ["/lm_head/MatMul_Q4"]
+        config["nodes_to_exclude"] = ["/lm_head/MatMulNBits", "/lm_head/MatMul_Q4"]
         if precision.value == Precision.INT4:
             config["use_int4"] = "true"
         return config
diff --git a/olive/engine/engine.py b/olive/engine/engine.py
index de6b7019a3..5d7cee3f26 100644
--- a/olive/engine/engine.py
+++ b/olive/engine/engine.py
@@ -195,15 +195,14 @@ def run(
             self.initialize(log_to_file, log_severity_level)
 
         output_dir: Path = (Path(output_dir) if output_dir else Path.cwd()).resolve()
-        if output_dir.suffix:
+        # Treat as file path only if it has a suffix and is not an existing directory
+        is_file_path = output_dir.suffix and not output_dir.is_dir()
+        if is_file_path:
             output_dir.parent.mkdir(parents=True, exist_ok=True)
+            artifacts_dir = output_dir.parent
         else:
             output_dir.mkdir(parents=True, exist_ok=True)
-
-        # Determine the directory for artifacts (run_history, etc.)
-        # If output_dir is a file path (has suffix), use parent directory
-        # Otherwise use output_dir itself
-        artifacts_dir = output_dir.parent if output_dir.suffix else output_dir
+            artifacts_dir = output_dir
 
         logger.info("Running Olive on accelerator: %s", accelerator_spec)
         with self._create_system():
@@ -254,10 +253,8 @@ def run_accelerator(
 
         self.footprint.record(is_input_model=True, model_id=input_model_id)
 
-        # Determine the directory for artifacts
-        # If output_dir is a file path (has suffix like .onnx), use parent directory
-        # Otherwise use output_dir itself
-        artifacts_dir = output_dir.parent if output_dir.suffix else output_dir
+        # Artifacts directory: file path (has suffix, not existing dir) uses parent
+        artifacts_dir = output_dir.parent if (output_dir.suffix and not output_dir.is_dir()) else output_dir
 
         try:
             if evaluate_input_model and not self.evaluator_config:
diff --git a/olive/passes/olive_pass.py b/olive/passes/olive_pass.py
index 627202a0c7..c0062b5cf6 100644
--- a/olive/passes/olive_pass.py
+++ b/olive/passes/olive_pass.py
@@ -245,7 +245,7 @@ def run(self, model: OliveModelHandler, output_model_path: str) -> OliveModelHan
         # assumption: the model attributes from passes, if any, are more important than
         # the input model attributes, we should not update/extend anymore outside of the pass run
         output_model.model_attributes = output_model.model_attributes or model.model_attributes
-        # save and carry forward additional files into the the output model path
+        # save and carry forward additional files into the output model path
         Pass._carry_forward_additional_files(model, output_model)
         return output_model
 
@@ -287,7 +287,10 @@ def _carry_forward_additional_files(input_model: OliveModelHandler, output_model
             output_filepath = output_model_path / input_filepath.name
             if not output_filepath.exists():
                 # TODO(team): Use symlinks instead of copying the files.
-                shutil.copy(str(input_filepath), str(output_filepath))
+                if input_filepath.is_dir():
+                    shutil.copytree(str(input_filepath), str(output_filepath))
+                else:
+                    shutil.copy(str(input_filepath), str(output_filepath))
             # always add the file_path to the output model's additional files
             # this covers the case where the output model_path is the same as the input model_path
             # like for perf-tuning pass
diff --git a/olive/passes/onnx/context_binary.py b/olive/passes/onnx/context_binary.py
index d802fcc575..6747dc74f0 100644
--- a/olive/passes/onnx/context_binary.py
+++ b/olive/passes/onnx/context_binary.py
@@ -74,8 +74,6 @@ def _run_for_config(
         config: type[BasePassConfig],
         output_model_path: str,
     ) -> Union[ONNXModelHandler, CompositeModelHandler]:
-        from onnxruntime import __version__ as OrtVersion
-
         # session created using providers argument so will use the ort.get_available_providers()
         # TODO(jambayk): consider switching to the new EP API for Windows
         from onnxruntime import get_available_providers
@@ -89,6 +87,27 @@ def _run_for_config(
             f" {get_available_providers()}"
         )
 
+        result = self._run_single_target(model, config, output_model_path)
+
+        # Populate model_attributes with context binary metadata so it persists in model_config.json
+        result.model_attributes = {**(model.model_attributes or {}), **(result.model_attributes or {})}
+        result.model_attributes["ep"] = self.accelerator_spec.execution_provider
+        result.model_attributes["device"] = str(self.accelerator_spec.accelerator_type).upper()
+        if config.provider_options:
+            result.model_attributes["provider_options"] = config.provider_options
+            result.model_attributes["architecture"] = config.provider_options.get("soc_model")
+
+        return result
+
+    def _run_single_target(
+        self,
+        model: Union[ONNXModelHandler, CompositeModelHandler],
+        config: type[BasePassConfig],
+        output_model_path: str,
+    ) -> Union[ONNXModelHandler, CompositeModelHandler]:
+        """Generate context binary for a single target. This is the original logic."""
+        from onnxruntime import __version__ as OrtVersion
+
         generate_kwargs = {
             "execution_provider": self.accelerator_spec.execution_provider,
             "provider_options": config.provider_options,
diff --git a/olive/passes/onnx/model_builder.py b/olive/passes/onnx/model_builder.py
index 978744ec1c..e2539fecac 100644
--- a/olive/passes/onnx/model_builder.py
+++ b/olive/passes/onnx/model_builder.py
@@ -214,12 +214,12 @@ def _run_for_config(
     ) -> ONNXModelHandler:
         try:
             from onnxruntime_genai.models.builder import create_model
-        except ImportError:
+        except ImportError as e:
             raise ImportError(
                 "onnxruntime-genai package is required to run ModelBuilder pass. Please install the package"
                 " corresponding to your onnxruntime installation using pip. cpu: onnxruntime-genai, cuda:"
                 " onnxruntime-genai-cuda, directml: onnxruntime-genai-directml"
-            ) from None
+            ) from e
         self.maybe_patch_quant()
 
         precision = config.precision
diff --git a/olive/passes/openvino/encapsulation.py b/olive/passes/openvino/encapsulation.py
index c8e24a2b37..055f13cf19 100644
--- a/olive/passes/openvino/encapsulation.py
+++ b/olive/passes/openvino/encapsulation.py
@@ -66,8 +66,7 @@ def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassCon
                 default_value=None,
                 required=False,
                 description=(
-                    "Name of the OpenVINO version to override in model SDK version."
-                    "Requires a minimum version of OpenVINO 2025.1"
+                    "OpenVINO version to override in model SDK version. Requires a minimum version of OpenVINO 2025.1"
                 ),
             ),
             "opset_imports": PassConfigParam(
@@ -115,6 +114,15 @@ def _run_for_config(
         config: type[BasePassConfig],
         output_model_path: str,
     ) -> ONNXModelHandler:
+        return self._run_single_target(model, config, output_model_path)
+
+    def _run_single_target(
+        self,
+        model: Union[OpenVINOModelHandler],
+        config: type[BasePassConfig],
+        output_model_path: str,
+    ) -> ONNXModelHandler:
+        """Encapsulate a single OpenVINO model. This is the original logic."""
         try:
             import openvino as ov
         except ImportError:
@@ -245,7 +253,25 @@ def _run_for_config(
         # generate the genai_config.json file for GenAI models
         create_genai_config(context_model_output, output_model_path, config)
 
-        return ONNXModelHandler(model_path=output_model_path)
+        # Collect config files (non-model files) for downstream ModelPackage
+        output_path = Path(output_model_path)
+        model_suffixes = {".onnx", ".xml", ".bin"}
+        additional_files = [
+            str(f)
+            for f in sorted(output_path.iterdir())
+            if (f.is_file() and f.suffix not in model_suffixes) or f.is_dir()
+        ]
+
+        # Populate model_attributes with context binary metadata so it persists in model_config.json
+        context_binary_attrs = {
+            **(model.model_attributes or {}),
+            "ep": "OpenVINOExecutionProvider",
+            "device": str(config.target_device).upper(),
+            "sdk_version": ov_version,
+            "additional_files": additional_files,
+        }
+
+        return ONNXModelHandler(model_path=output_model_path, model_attributes=context_binary_attrs)
 
 
 def extract_shape_list(shape, config, prefix: str = "input_0_") -> list:
diff --git a/olive/passes/openvino/optimum_intel.py b/olive/passes/openvino/optimum_intel.py
index 2105f512f2..d898e665eb 100644
--- a/olive/passes/openvino/optimum_intel.py
+++ b/olive/passes/openvino/optimum_intel.py
@@ -3,6 +3,7 @@
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
 import logging
+import os
 from copy import deepcopy
 from pathlib import Path
 from typing import Any, Optional, Union
@@ -497,6 +498,17 @@ def _run_for_config(
             extra_args.pop("disable_convert_tokenizer", False)
             extra_args["library_name"] = lib_name
             extra_args.pop("library", None)
+
+            # Workaround for optimum-intel using Path.rename() which fails across filesystems.
+            # Set tempdir to output path so temp files are on the same filesystem as the cache.
+            import tempfile
+
+            Path(output_model_path).mkdir(parents=True, exist_ok=True)
+            original_tmpdir = os.environ.get("TMPDIR")
+            original_tempdir = tempfile.tempdir
+            os.environ["TMPDIR"] = output_model_path
+            tempfile.tempdir = output_model_path
+
             export_optimum_intel(
                 model.model_name_or_path,
                 output_model_path,
@@ -516,7 +528,13 @@ def _run_for_config(
                     model_kwargs=model.load_kwargs.__dict__ if model.load_kwargs else None,
                 )
         except Exception as e:
-            raise RuntimeError(f"OpenVINO optimum export failed: {e}") from None
+            raise RuntimeError(f"OpenVINO optimum export failed: {e}") from e
+        finally:
+            tempfile.tempdir = original_tempdir
+            if original_tmpdir is None:
+                os.environ.pop("TMPDIR", None)
+            else:
+                os.environ["TMPDIR"] = original_tmpdir
 
         # check the exported components
         exported_models = [name.stem for name in Path(output_model_path).iterdir() if name.suffix == ".xml"]
diff --git a/olive/systems/system_config.py b/olive/systems/system_config.py
index dab5da3503..5addeadc61 100644
--- a/olive/systems/system_config.py
+++ b/olive/systems/system_config.py
@@ -6,7 +6,7 @@
 from pathlib import Path
 from typing import Optional, Union
 
-from pydantic import ConfigDict, Field, field_validator
+from pydantic import ConfigDict, Field, SerializeAsAny, field_validator
 
 from olive.common.config_utils import ConfigBase, NestedConfig, validate_config
 from olive.systems.common import AcceleratorConfig, SystemType
@@ -88,7 +88,7 @@ def import_system_from_type(system_type: SystemType):
 
 class SystemConfig(NestedConfig):
     type: SystemType
-    config: Optional[TargetUserConfig] = Field(default=None, validate_default=True)
+    config: Optional[SerializeAsAny[TargetUserConfig]] = Field(default=None, validate_default=True)
 
     @field_validator("config", mode="before")
     @classmethod
diff --git a/test/cli/test_model_package.py b/test/cli/test_model_package.py
new file mode 100644
index 0000000000..ba34485672
--- /dev/null
+++ b/test/cli/test_model_package.py
@@ -0,0 +1,147 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+# pylint: disable=protected-access
+import json
+from argparse import ArgumentParser
+
+import pytest
+
+from olive.cli.model_package import ModelPackageCommand
+
+
+def _create_source_dir(tmp_path, name, model_attributes):
+    """Create a fake Olive output directory with model_config.json and a dummy .onnx file."""
+    source_dir = tmp_path / name
+    source_dir.mkdir(parents=True)
+    model_config = {
+        "type": "ONNXModel",
+        "config": {"model_path": str(source_dir / "model.onnx"), "model_attributes": model_attributes},
+    }
+    (source_dir / "model_config.json").write_text(json.dumps(model_config))
+    (source_dir / "model.onnx").write_text("dummy")
+    return source_dir
+
+
+def _make_command(args_list):
+    """Create a ModelPackageCommand instance from CLI args."""
+    parser = ArgumentParser()
+    commands_parser = parser.add_subparsers()
+    ModelPackageCommand.register_subcommand(commands_parser)
+    parsed_args, unknown = parser.parse_known_args(args_list)
+    return parsed_args.func(parser, parsed_args, unknown)
+
+
+class TestSourceValidation:
+    """Tests for _parse_sources validation logic."""
+
+    def test_rejects_single_source(self, tmp_path):
+        # setup
+        src = _create_source_dir(tmp_path, "soc_60", {"ep": "QNNExecutionProvider"})
+        cmd = _make_command(["generate-model-package", "-s", str(src), "-o", str(tmp_path / "out")])
+
+        # execute + assert
+        with pytest.raises(ValueError, match="At least two"):
+            cmd._parse_sources()
+
+    def test_rejects_missing_model_config(self, tmp_path):
+        # setup
+        no_config = tmp_path / "no_config"
+        no_config.mkdir()
+        valid = _create_source_dir(tmp_path, "valid", {"ep": "QNNExecutionProvider"})
+        cmd = _make_command(
+            ["generate-model-package", "-s", str(no_config), "-s", str(valid), "-o", str(tmp_path / "out")]
+        )
+
+        # execute + assert
+        with pytest.raises(ValueError, match="model_config.json"):
+            cmd._parse_sources()
+
+    def test_rejects_nonexistent_path(self, tmp_path):
+        # setup
+        valid = _create_source_dir(tmp_path, "valid", {"ep": "QNNExecutionProvider"})
+        cmd = _make_command(
+            ["generate-model-package", "-s", "/nonexistent/path", "-s", str(valid), "-o", str(tmp_path / "out")]
+        )
+
+        # execute + assert
+        with pytest.raises(ValueError, match="does not exist"):
+            cmd._parse_sources()
+
+    def test_parses_two_valid_sources(self, tmp_path):
+        # setup
+        src1 = _create_source_dir(tmp_path, "soc_60", {"ep": "QNNExecutionProvider"})
+        src2 = _create_source_dir(tmp_path, "soc_73", {"ep": "QNNExecutionProvider"})
+        cmd = _make_command(["generate-model-package", "-s", str(src1), "-s", str(src2), "-o", str(tmp_path / "out")])
+
+        # execute
+        sources = cmd._parse_sources()
+
+        # assert
+        assert len(sources) == 2
+        assert sources[0] == ("soc_60", src1)
+        assert sources[1] == ("soc_73", src2)
+
+
+class TestGeneratePackageSingle:
+    """Tests for single-component model package generation."""
+
+    def test_generates_manifest_and_metadata(self, tmp_path):
+        """Package output should have manifest.json and metadata.json."""
+        # setup
+        src1 = _create_source_dir(tmp_path, "soc_60", {"ep": "QNNExecutionProvider", "device": "NPU"})
+        src2 = _create_source_dir(tmp_path, "soc_73", {"ep": "QNNExecutionProvider", "device": "NPU"})
+        out_dir = tmp_path / "out"
+        cmd = _make_command(
+            [
+                "generate-model-package",
+                "-s",
+                str(src1),
+                "-s",
+                str(src2),
+                "-o",
+                str(out_dir),
+                "--model_name",
+                "test_model",
+                "--model_version",
+                "2.0",
+            ]
+        )
+
+        # execute
+        cmd.run()
+
+        # assert: manifest
+        manifest_path = out_dir / "manifest.json"
+        assert manifest_path.exists()
+        manifest = json.loads(manifest_path.read_text())
+        assert manifest["name"] == "test_model"
+        assert manifest["model_version"] == "2.0"
+        assert "component_models" in manifest
+
+        # assert: metadata in component dir
+        component_name = manifest["component_models"][0]
+        metadata_path = out_dir / "models" / component_name / "metadata.json"
+        assert metadata_path.exists()
+        metadata = json.loads(metadata_path.read_text())
+        assert "soc_60" in metadata["model_variants"]
+        assert "soc_73" in metadata["model_variants"]
+
+        # assert: constraints
+        for variant in metadata["model_variants"].values():
+            assert variant["constraints"]["ep"] == "QNNExecutionProvider"
+            assert variant["constraints"]["device"] == "NPU"
+
+
+class TestAcceleratorInfo:
+    """Test accelerator info extraction."""
+
+    def test_defaults_accelerator_when_no_attributes(self):
+        """Falls back to CPUExecutionProvider/cpu when model_attributes is empty."""
+        # setup + execute
+        ep, device = ModelPackageCommand._extract_accelerator_info([{"type": "ONNXModel", "config": {}}])
+
+        # assert
+        assert ep == "CPUExecutionProvider"
+        assert device == "cpu"
diff --git a/test/passes/onnx/test_context_binary.py b/test/passes/onnx/test_context_binary.py
index deee87c550..7897167df3 100644
--- a/test/passes/onnx/test_context_binary.py
+++ b/test/passes/onnx/test_context_binary.py
@@ -132,3 +132,50 @@ def test_ep_context_binary_generator_composite(tmp_path, is_llm):
         assert expected_model_path.exists()
         if not is_skipped:
             assert len(list(output_model_path.glob(f"{name}_ctx*.bin"))) == 1
+
+
+def _mock_get_available_providers():
+    return ["QNNExecutionProvider", "CPUExecutionProvider"]
+
+
+def test_single_target_populates_model_attributes(tmp_path):
+    """Single-target mode should populate model_attributes."""
+    from pathlib import Path
+    from unittest.mock import patch
+
+    accelerator_spec = AcceleratorSpec(accelerator_type="NPU", execution_provider="QNNExecutionProvider")
+
+    p = create_pass_from_dict(
+        EPContextBinaryGenerator,
+        {
+            "provider_options": {
+                "soc_model": "60",
+                "htp_performance_mode": "burst",
+            },
+        },
+        disable_search=True,
+        accelerator_spec=accelerator_spec,
+    )
+
+    with (
+        patch.object(EPContextBinaryGenerator, "_run_single_target") as mock_single,
+        patch("onnxruntime.get_available_providers", _mock_get_available_providers),
+    ):
+
+        def side_effect(model, config, output_model_path):
+            out_path = Path(output_model_path)
+            out_path.parent.mkdir(parents=True, exist_ok=True)
+            out_path.write_text("dummy")
+            return ONNXModelHandler(model_path=str(out_path))
+
+        mock_single.side_effect = side_effect
+
+        input_model = get_onnx_model()
+        output_path = str(tmp_path / "output.onnx")
+        result = p.run(input_model, output_path)
+
+    assert isinstance(result, ONNXModelHandler)
+    assert result.model_attributes["ep"] == "QNNExecutionProvider"
+    assert result.model_attributes["device"] == "NPU"
+    assert result.model_attributes["architecture"] == "60"
+    assert result.model_attributes["provider_options"]["soc_model"] == "60"
diff --git a/test/passes/openvino/test_openvino_encapsulation.py b/test/passes/openvino/test_openvino_encapsulation.py
index bfbc15a260..0387bf89b4 100644
--- a/test/passes/openvino/test_openvino_encapsulation.py
+++ b/test/passes/openvino/test_openvino_encapsulation.py
@@ -3,9 +3,12 @@
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
 from pathlib import Path
+from unittest.mock import MagicMock, patch
 
 import pytest
 
+from olive.hardware.accelerator import AcceleratorSpec, Device
+from olive.model import ONNXModelHandler
 from olive.passes.olive_pass import create_pass_from_dict
 from olive.passes.openvino.conversion import OpenVINOConversion
 from olive.passes.openvino.encapsulation import OpenVINOEncapsulation
@@ -101,3 +104,43 @@ def test_openvino_encapsulate_pass_dynamic_keep_ov_dynamic_dims(tmp_path):
     # assert
     assert Path(onnx_model.model_path).exists()
     assert (Path(onnx_model.model_path)).is_file()
+
+
+def test_single_target_populates_model_attributes(tmp_path):
+    accelerator_spec = AcceleratorSpec(accelerator_type=Device.NPU, execution_provider="OpenVINOExecutionProvider")
+
+    p = create_pass_from_dict(
+        OpenVINOEncapsulation,
+        {"ov_version": "2025.1", "target_device": "npu"},
+        disable_search=True,
+        accelerator_spec=accelerator_spec,
+    )
+
+    with patch.object(OpenVINOEncapsulation, "_run_single_target") as mock_single:
+
+        def side_effect(model, config, output_model_path):
+            out_dir = Path(output_model_path)
+            out_dir.parent.mkdir(parents=True, exist_ok=True)
+            out_dir.mkdir(parents=True, exist_ok=True)
+            model_file = out_dir / "model.onnx"
+            model_file.write_text("dummy")
+            return ONNXModelHandler(
+                model_path=str(model_file),
+                model_attributes={
+                    "ep": "OpenVINOExecutionProvider",
+                    "device": "NPU",
+                    "sdk_version": "2025.1",
+                    "architecture": "NPU",
+                },
+            )
+
+        mock_single.side_effect = side_effect
+
+        input_model = MagicMock()
+        input_model.model_attributes = {}
+        output_path = str(tmp_path / "output.onnx")
+        result = p.run(input_model, output_path)
+
+    assert isinstance(result, ONNXModelHandler)
+    assert result.model_attributes["ep"] == "OpenVINOExecutionProvider"
+    assert result.model_attributes["sdk_version"] == "2025.1"