-
Notifications
You must be signed in to change notification settings - Fork 450
Add anymodel-core to feature/puzzletron #974
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
e82164f
2099df3
eb5cf8a
c9de41c
3c1bc1f
6cc2194
ee4e1e3
449b523
fb27bba
b350f82
fafe5a3
c717852
030f126
70df0df
ee8f538
47414d5
a8305d8
68421a5
d6b8028
ecd2341
f9d845d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| """Converters for transforming HuggingFace models to AnyModel format.""" | ||
|
|
||
| from .convert_any_model import * | ||
| from .converter import * | ||
| from .converter_factory import * |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,68 @@ | ||
| # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| # mypy: ignore-errors | ||
|
|
||
| """Convert a HuggingFace model to AnyModel format.""" | ||
|
|
||
| from pathlib import Path | ||
|
|
||
| from modelopt.torch.puzzletron.anymodel.converter.converter import Converter | ||
| from modelopt.torch.puzzletron.anymodel.converter.converter_factory import ConverterFactory | ||
| from modelopt.torch.puzzletron.anymodel.model_descriptor import ModelDescriptorFactory | ||
|
|
||
| __all__ = ["convert_model"] | ||
|
|
||
|
|
||
| def convert_model( | ||
| input_dir: str, | ||
| output_dir: str, | ||
| converter: Converter | str, | ||
| ): | ||
| """Convert a HuggingFace model to AnyModel format. | ||
|
|
||
| This function converts a HuggingFace checkpoint to the AnyModel format used | ||
| for compression. The conversion process: | ||
|
|
||
| 1. Copies non-weight files (config, tokenizer, etc.) | ||
| 2. Creates block_configs for each layer | ||
| 3. Reorganizes weights into subblock checkpoints | ||
|
|
||
| Args: | ||
| input_dir: Path to the input HuggingFace checkpoint directory. | ||
| output_dir: Path to the output AnyModel checkpoint directory. | ||
| converter: Either a converter name (e.g., "llama") or a Converter class. | ||
|
|
||
| Example: | ||
| >>> convert_model( | ||
| ... input_dir="/path/to/Llama-3.1-8B-Instruct", | ||
| ... output_dir="/path/to/output/ckpts/teacher", | ||
| ... converter="llama", | ||
| ... ) | ||
| """ | ||
| input_dir = Path(input_dir) | ||
| output_dir = Path(output_dir) | ||
| output_dir.mkdir(parents=True, exist_ok=True) | ||
|
|
||
| # Get descriptor and converter from factories (they use the same name) | ||
| descriptor = ModelDescriptorFactory.get(converter) | ||
| converter = ConverterFactory.get(converter) | ||
|
|
||
| converter.convert(descriptor=descriptor, input_dir=input_dir, output_dir=output_dir) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| from fire import Fire | ||
|
|
||
| Fire(convert_model) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,235 @@ | ||
| # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| # mypy: ignore-errors | ||
|
|
||
| import copy | ||
| import fnmatch | ||
| import json | ||
| import os | ||
| import shutil | ||
| from abc import ABC, abstractmethod | ||
| from collections import defaultdict | ||
| from pathlib import Path | ||
| from typing import Dict, List | ||
|
|
||
| from safetensors.torch import load_file, save_file | ||
| from tqdm import tqdm | ||
| from transformers import PretrainedConfig | ||
| from transformers.integrations.mxfp4 import convert_moe_packed_tensors | ||
|
|
||
| from modelopt.torch.puzzletron.anymodel.model_descriptor import ModelDescriptor | ||
| from modelopt.torch.puzzletron.decilm.deci_lm_hf_code.block_config import BlockConfig | ||
| from modelopt.torch.puzzletron.tools.checkpoint_utils_hf import load_model_config, save_model_config | ||
|
|
||
| __all__ = ["Converter"] | ||
|
|
||
|
|
||
| class Converter(ABC): | ||
| """Base class for converting HuggingFace models to Puzzletron/AnyModel format.""" | ||
|
|
||
| @staticmethod | ||
| def _get_weight_map(input_dir: Path) -> Dict[str, str]: | ||
| """Load weight map from checkpoint directory (supports both sharded and single-file models). | ||
|
|
||
| Returns a dict mapping parameter names to their safetensors filenames. | ||
| """ | ||
| index_path = input_dir / "model.safetensors.index.json" | ||
| single_file_path = input_dir / "model.safetensors" | ||
|
|
||
| if index_path.exists(): | ||
| # Sharded model | ||
| with open(index_path, "r") as f: | ||
| index = json.load(f) | ||
| return index["weight_map"] | ||
| elif single_file_path.exists(): | ||
| # Single file model - create a synthetic weight map | ||
| data = load_file(single_file_path) | ||
| return {name: "model.safetensors" for name in data.keys()} | ||
| else: | ||
| raise FileNotFoundError( | ||
| f"Neither {index_path} nor {single_file_path} found. Cannot determine model format." | ||
| ) | ||
|
|
||
| @classmethod | ||
| def convert_model_weights( | ||
| cls, input_dir: Path, output_dir: Path, descriptor: ModelDescriptor, num_hidden_layers: int | ||
| ): | ||
| """Convert model weights to subblock format.""" | ||
| param_to_file = Converter._get_weight_map(input_dir) | ||
| all_param_names = list(param_to_file.keys()) | ||
|
|
||
| # Reverse map: file -> set of params | ||
| file_to_params = defaultdict(set) | ||
| for name, file in param_to_file.items(): | ||
| file_to_params[file].add(name) | ||
|
|
||
| # Determine subblocks needed | ||
| subblocks = descriptor.get_weight_groups( | ||
| all_param_names, num_hidden_layers=num_hidden_layers | ||
| ) | ||
|
|
||
| # Output directory | ||
| out_dir = output_dir / "subblocks_safetensors" | ||
| os.makedirs(out_dir, exist_ok=True) | ||
|
|
||
| # New weight index | ||
| new_index = {"metadata": {"format": "pt"}, "weight_map": {}} | ||
|
|
||
| for subblock, param_names in tqdm(subblocks.items(), desc="Processing subblocks"): | ||
| param_files = set(param_to_file[name] for name in param_names) | ||
| tensors = {} | ||
|
|
||
| # Load only needed files for this subblock | ||
| for file in param_files: | ||
| data = load_file(os.path.join(input_dir, file)) | ||
| for name in param_names: | ||
| if param_to_file[name] == file and name in data: | ||
| converted_name = cls.convert_weight_name(name) | ||
| # Convert MoE packed tensors if quantized is mxfp4 //gpt-oss-20b | ||
| if getattr(cls, "quantized", None) == "mxfp4": | ||
| if name.endswith("_blocks"): | ||
| converted_name = converted_name.replace("_blocks", "") | ||
| tensors[converted_name] = convert_moe_packed_tensors( | ||
| data[converted_name + "_blocks"], | ||
| data[converted_name + "_scales"], | ||
| ) | ||
| elif name.endswith("_scales"): | ||
| continue | ||
| else: | ||
| tensors[converted_name] = data[name] | ||
| else: | ||
| tensors[converted_name] = data[name] | ||
|
Comment on lines
+100
to
+113
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
# First, let's locate and examine the converter.py file
fd -t f "converter.py" -p "modelopt/torch/puzzletron"Repository: NVIDIA/Model-Optimizer Length of output: 190 🏁 Script executed: #!/bin/bash
# Examine the converter.py file around lines 100-113 with broader context
cat -n modelopt/torch/puzzletron/anymodel/converter/converter.py | head -150 | tail -100Repository: NVIDIA/Model-Optimizer Length of output: 4863 🏁 Script executed: #!/bin/bash
# Search for convert_moe_packed_tensors function definition and usage
rg -n "def convert_moe_packed_tensors|convert_moe_packed_tensors" --type=py -B2 -A5Repository: NVIDIA/Model-Optimizer Length of output: 1960 🏁 Script executed: #!/bin/bash
# Look for tests related to MoE conversion
fd -t f "test_*.py" -o "*_test.py" | xargs rg -l "moe|MoE" 2>/dev/nullRepository: NVIDIA/Model-Optimizer Length of output: 5021 🏁 Script executed: #!/bin/bash
# Search for convert_weight_name implementation
rg -n "def convert_weight_name" --type=py -A10Repository: NVIDIA/Model-Optimizer Length of output: 1214 🏁 Script executed: #!/bin/bash
# Search for subclasses that override convert_weight_name
rg -n "def convert_weight_name" --type=py -A5Repository: NVIDIA/Model-Optimizer Length of output: 742 🏁 Script executed: #!/bin/bash
# Search for mxfp4 quantization usage and MoE conversion tests
rg -n "mxfp4|convert_moe_packed_tensors" --type=py -B3 -A3Repository: NVIDIA/Model-Optimizer Length of output: 19846 🏁 Script executed: #!/bin/bash
# Look for llama converter which might use the base Converter
cat -n modelopt/torch/puzzletron/anymodel/models/llama/llama_converter.py | head -50Repository: NVIDIA/Model-Optimizer Length of output: 2273 🏁 Script executed: #!/bin/bash
# Search for actual MoE checkpoint structure documentation or examples
rg -n "moe.*blocks.*scales|_blocks.*_scales" --type=py -B2 -A2 | head -50Repository: NVIDIA/Model-Optimizer Length of output: 1230 🏁 Script executed: #!/bin/bash
# Check if there are any actual tests that exercise the MoE conversion path with real data
rg -n "convert_model_weights.*mxfp4|gpt-oss-20b" --type=py -B3 -A3Repository: NVIDIA/Model-Optimizer Length of output: 3999 🏁 Script executed: #!/bin/bash
# Verify the logic by examining the exact flow: what names come from param_to_file
cat -n modelopt/torch/puzzletron/anymodel/converter/converter.py | sed -n '65,130p'Repository: NVIDIA/Model-Optimizer Length of output: 3434 🏁 Script executed: #!/bin/bash
# Check how param_to_file is built to understand what names it contains
cat -n modelopt/torch/puzzletron/anymodel/converter/converter.py | sed -n '43,63p'Repository: NVIDIA/Model-Optimizer Length of output: 1158 🏁 Script executed: #!/bin/bash
# Verify by checking if any existing code might override convert_weight_name for MoE models
rg -n "class.*Converter.*:|quantized.*=" modelopt/torch/puzzletron --type=py -A5 | grep -A5 "quantized"Repository: NVIDIA/Model-Optimizer Length of output: 3733 🏁 Script executed: #!/bin/bash
# Final verification: check the exact structure of what gets passed to convert_moe_packed_tensors
rg -n "def convert_moe_packed_tensors" --type=py -A10 -B2Repository: NVIDIA/Model-Optimizer Length of output: 48 Use original parameter name to construct data dict keys in MoE conversion logic. Lines 105-106 access Fix: Construct data keys using the original data[name],
data[name.replace("_blocks", "_scales")]Reserve 🤖 Prompt for AI Agents |
||
|
|
||
| # Save this subblock | ||
| print(f"\n✅ Group: {subblock} ({len(tensors)} layers)") | ||
| for layer in tensors.keys(): | ||
| print(f" - {layer}") | ||
|
|
||
| subblock_file = f"{subblock}.safetensors" | ||
| save_file(tensors, os.path.join(out_dir, subblock_file)) | ||
|
|
||
| # Update index | ||
| for new_name in tensors.keys(): | ||
| new_index["weight_map"][new_name] = f"subblocks_safetensors/{subblock_file}" | ||
|
|
||
| # Save new index file | ||
| with (output_dir / "model.safetensors.index.json").open("w") as f: | ||
| json.dump(new_index, f, indent=2) | ||
|
|
||
| print(f"✅ Finished saving subblocks and index to {output_dir}") | ||
|
|
||
| @classmethod | ||
| def convert_configs_in_dirs( | ||
| cls, | ||
| input_dir: Path, | ||
| output_dir: Path, | ||
| ): | ||
| """Convert config and add block_configs.""" | ||
| config = load_model_config(input_dir) | ||
|
|
||
| block_configs = cls.create_block_configs_from_main_config(config) | ||
| out_config = copy.deepcopy(config) | ||
| out_config.block_configs = block_configs | ||
|
|
||
| save_model_config(out_config, output_dir) | ||
| return out_config | ||
|
|
||
| @staticmethod | ||
| def copy_checkpoint_files(input_dir: Path, output_dir: Path): | ||
| """Copy checkpoint files except model weights (which will be converted).""" | ||
| ignore_patterns = [ | ||
| "model-*.safetensors", | ||
| "model.safetensors", | ||
| "model.safetensors.index.json", | ||
| "subblocks_safetensors", | ||
| ] | ||
|
|
||
| def ignore_func(dir, files): | ||
| ignored = set() | ||
| for pattern in ignore_patterns: | ||
| ignored.update(fnmatch.filter(files, pattern)) | ||
| return ignored | ||
|
|
||
| shutil.copytree(str(input_dir), str(output_dir), ignore=ignore_func, dirs_exist_ok=True) | ||
|
|
||
| @classmethod | ||
| def convert( | ||
| cls, | ||
| descriptor: ModelDescriptor, | ||
| input_dir: Path, | ||
| output_dir: Path, | ||
| ): | ||
| """Convert a HuggingFace model to AnyModel format. | ||
|
|
||
| Args: | ||
| descriptor: Model descriptor for the model type. | ||
| input_dir: Path to the input HuggingFace checkpoint. | ||
| output_dir: Path to the output AnyModel checkpoint. | ||
| """ | ||
| cls.copy_checkpoint_files(input_dir, output_dir) | ||
| config = cls.convert_configs_in_dirs(input_dir, output_dir) | ||
| cls.convert_model_weights( | ||
| input_dir, output_dir, descriptor=descriptor, num_hidden_layers=config.num_hidden_layers | ||
| ) | ||
|
|
||
| @staticmethod | ||
| @abstractmethod | ||
| def create_block_configs_from_main_config(config: PretrainedConfig) -> List[BlockConfig]: | ||
| """Create per-layer BlockConfig list from a HuggingFace model config. | ||
|
|
||
| This method extracts layer-specific parameters (e.g., intermediate_size, | ||
| num_key_value_heads) from the main model config and creates a BlockConfig | ||
| for each layer. These BlockConfigs enable layer-specific pruning and | ||
| modifications during the compression pipeline. | ||
|
|
||
| Args: | ||
| config: HuggingFace PretrainedConfig (e.g., LlamaConfig, Qwen2Config) | ||
|
|
||
| Returns: | ||
| List of BlockConfig, one per hidden layer. Each BlockConfig contains: | ||
| - AttentionConfig: attention settings (no_op, num_key_value_heads) | ||
| - FFNConfig: FFN settings (no_op, intermediate_size) | ||
|
|
||
| Example: | ||
| For a model with uniform layers (e.g., Llama): | ||
| return [BlockConfig(...)] * config.num_hidden_layers | ||
|
|
||
| For a model with heterogeneous layers (e.g., NemotronH with Mamba/Attention): | ||
| return [BlockConfig(...) for layer_idx in range(num_layers)] | ||
| """ | ||
| raise NotImplementedError | ||
|
|
||
| @staticmethod | ||
| def convert_weight_name(name: str) -> str: | ||
| """ | ||
| Convert weight names during checkpoint conversion. | ||
|
|
||
| This method can be overridden by subclasses to apply model-specific weight name | ||
| transformations when converting checkpoints from HuggingFace format to Puzzletron format. | ||
|
|
||
| Default implementation returns the name unchanged (identity function). | ||
|
|
||
| Args: | ||
| name: Original weight name from HuggingFace checkpoint | ||
|
|
||
| Returns: | ||
| Converted weight name for Puzzletron format | ||
|
|
||
| Example: | ||
| For Qwen2.5-VL, this converts: | ||
| - visual.* → model.visual.* | ||
| - model.* → model.language_model.* | ||
| """ | ||
| return name | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
converterAPI contract is currently inconsistent with implementation.The function documents support for passing a Converter class, but the current resolution path expects a registry key and can produce an invalid
descriptorobject when a class is passed.✅ Proposed fix (restrict and validate)
def convert_model( input_dir: str, output_dir: str, - converter: Converter | str, + converter: str, ): @@ - descriptor = ModelDescriptorFactory.get(converter) - converter = ConverterFactory.get(converter) - - converter.convert(descriptor=descriptor, input_dir=input_dir, output_dir=output_dir) + descriptor = ModelDescriptorFactory.get(converter) + converter_cls = ConverterFactory.get(converter) + converter_cls.convert(descriptor=descriptor, input_dir=input_dir, output_dir=output_dir)Also update the docstring to state
converteris a registered converter name.🤖 Prompt for AI Agents