-
Notifications
You must be signed in to change notification settings - Fork 450
Support Mixed precision & Static MSE in MCore; Nemotron Super v3 NVFP4 recipe #1521
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
676eac4
9143e02
57e5f26
2dea94a
918ed6a
05a436f
ff20eca
985da85
d88e54a
5f291a6
5c9cd43
c5c7a2e
d63bf70
e14fa62
e985e93
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,9 +22,21 @@ | |
|
|
||
| import torch | ||
| from huggingface_hub import snapshot_download | ||
| from huggingface_hub.errors import LocalEntryNotFoundError | ||
| from safetensors.torch import safe_open | ||
| from tqdm import tqdm | ||
|
|
||
| _HF_HUB_OFFLINE_TRUE_VALUES = {"1", "ON", "YES", "TRUE"} | ||
|
|
||
|
|
||
| def _is_hf_hub_offline() -> bool: | ||
| return os.environ.get("HF_HUB_OFFLINE", "").strip().upper() in _HF_HUB_OFFLINE_TRUE_VALUES | ||
|
|
||
|
|
||
| def _copy_python_files(source_dir: Path, save_dir: Path) -> None: | ||
| for py_file in source_dir.glob("*.py"): | ||
| shutil.copy2(py_file, save_dir / py_file.name) | ||
|
Comment on lines
+36
to
+38
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Copy Python sidecars recursively to avoid missing package modules. Line 37 only scans top-level 💡 Proposed fix def _copy_python_files(source_dir: Path, save_dir: Path) -> None:
- for py_file in source_dir.glob("*.py"):
- shutil.copy2(py_file, save_dir / py_file.name)
+ for py_file in source_dir.rglob("*.py"):
+ rel = py_file.relative_to(source_dir)
+ target = save_dir / rel
+ target.parent.mkdir(parents=True, exist_ok=True)
+ shutil.copy2(py_file, target)🤖 Prompt for AI Agents |
||
|
|
||
|
|
||
| def copy_hf_ckpt_remote_code( | ||
| pretrained_model_path: str | os.PathLike, save_directory: str | os.PathLike | ||
|
|
@@ -36,7 +48,10 @@ def copy_hf_ckpt_remote_code( | |
| frameworks. | ||
|
|
||
| If ``pretrained_model_path`` is a local directory, Python files are copied directly. | ||
| If it's a HF Hub model ID (e.g. ``nvidia/NVIDIA-Nemotron-Nano-12B-v2``), files are downloaded from the Hub. | ||
| If it's a HF Hub model ID (e.g. ``nvidia/NVIDIA-Nemotron-Nano-12B-v2``), the Hub | ||
| snapshot is resolved first and Python files are copied from that snapshot. When | ||
| ``HF_HUB_OFFLINE`` is set, the snapshot must already be available in the local | ||
| Hugging Face cache. | ||
|
|
||
| Args: | ||
| pretrained_model_path: Local path to the pretrained model or HuggingFace Hub model ID. | ||
|
|
@@ -47,14 +62,28 @@ def copy_hf_ckpt_remote_code( | |
| save_dir.mkdir(parents=True, exist_ok=True) | ||
|
|
||
| if hf_checkpoint_path.is_dir(): | ||
| for py_file in hf_checkpoint_path.glob("*.py"): | ||
| shutil.copy2(py_file, save_dir / py_file.name) | ||
| _copy_python_files(hf_checkpoint_path, save_dir) | ||
| else: | ||
| snapshot_download( | ||
| repo_id=str(pretrained_model_path), | ||
| local_dir=str(save_dir), | ||
| allow_patterns=["*.py"], | ||
| ) | ||
| local_files_only = _is_hf_hub_offline() | ||
| try: | ||
| source_dir = Path( | ||
| snapshot_download( | ||
| repo_id=str(pretrained_model_path), | ||
| allow_patterns=["*.py"], | ||
| local_files_only=local_files_only, | ||
| ) | ||
| ) | ||
| except LocalEntryNotFoundError as exc: | ||
| if local_files_only: | ||
| raise RuntimeError( | ||
| f"Could not copy Python sidecar files for {pretrained_model_path!r} because " | ||
| "HF_HUB_OFFLINE is enabled and the files are not available in the local " | ||
| "Hugging Face cache. Populate the cache with the model's *.py files or pass " | ||
| "a local pretrained model directory." | ||
| ) from exc | ||
| raise | ||
|
|
||
| _copy_python_files(source_dir, save_dir) | ||
|
|
||
|
|
||
| def load_multimodal_components( | ||
|
|
@@ -123,3 +152,34 @@ def load_multimodal_components( | |
|
|
||
| print(f"Successfully loaded {len(multimodal_state_dict)} multimodal tensors") | ||
| return multimodal_state_dict | ||
|
|
||
|
|
||
| _TOKENIZER_FILES = ( | ||
| "tokenizer.json", | ||
| "tokenizer_config.json", | ||
| "special_tokens_map.json", | ||
| "chat_template.jinja", | ||
| "added_tokens.json", | ||
| "tokenizer.model", | ||
| "vocab.json", | ||
| "merges.txt", | ||
| ) | ||
|
jenchen13 marked this conversation as resolved.
Outdated
|
||
|
|
||
|
|
||
| def copy_tokenizer_from_local_ckpt(src: str | os.PathLike, dst: str | os.PathLike): | ||
| """Copy tokenizer files verbatim from a local HF checkpoint dir. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion | 🟠 Major | ⚡ Quick win Export the new public helper via module
💡 Proposed fix+__all__ = [
+ "copy_hf_ckpt_remote_code",
+ "load_multimodal_components",
+ "copy_tokenizer_from_local_ckpt",
+]🤖 Prompt for AI Agents |
||
|
|
||
| Preserves the pre-transformers-v5 PreTrainedTokenizer(Fast) layout (full | ||
| ``added_tokens_decoder`` in ``tokenizer_config.json`` + ``special_tokens_map.json``). | ||
|
|
||
| Args: | ||
| src: Source HF checkpoint directory. Must be a local path. | ||
| dst: Destination directory; created if missing. | ||
| """ | ||
| if not os.path.isdir(src): | ||
| raise ValueError(f"Invalid source path: {src}. It should be a directory.") | ||
| os.makedirs(dst, exist_ok=True) | ||
| for fn in _TOKENIZER_FILES: | ||
| p = os.path.join(src, fn) | ||
| if os.path.isfile(p): | ||
| shutil.copy2(p, dst) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
just a linter change