Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 40 additions & 2 deletions nemo/tron/converter/qwen.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

if TYPE_CHECKING:
from transformers import Qwen2Config as HFQwen2Config
from transformers import Qwen3Config as HFQwen3Config

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -105,6 +106,44 @@ def tron_config(self) -> Qwen2Config:
return self._tron_config


class HFQwen3Exporter(BaseExporter):
"""Exporter to convert NeMo Qwen3 models to Hugging Face format."""

convert_state = _NeMo2HFQwen3Exporter.convert_state

@property
def hf_config(self) -> "HFQwen3Config":
"""Generate a Hugging Face Qwen3 configuration from the NeMo model configuration.

This property maps NeMo configuration parameters to their Hugging Face equivalents.

Returns:
HFQwen3Config: A Hugging Face Qwen3 configuration
"""
if self._hf_config is not None:
return self._hf_config

from transformers import Qwen3Config as HFQwen3Config

source = self.tron_config
self._hf_config = HFQwen3Config(
num_hidden_layers=source.num_layers,
hidden_size=source.hidden_size,
intermediate_size=source.ffn_hidden_size,
num_attention_heads=source.num_attention_heads,
max_position_embeddings=source.max_position_embeddings,
initializer_range=source.init_method_std,
rms_norm_eps=source.layernorm_epsilon,
num_key_value_heads=source.num_query_groups,
rope_theta=source.rotary_base,
vocab_size=getattr(source, "vocab_size",
self.tokenizer.vocab_size),
sliding_window=source.seq_length,
tie_word_embeddings=source.share_embeddings_and_output_weights,
)
return self._hf_config


class HFQwen3Importer(BaseImporter):
"""Importer for converting Hugging Face Qwen3 models to NeMo Tron format."""

Expand Down Expand Up @@ -132,15 +171,14 @@ def tron_config(self) -> Qwen3Config:
configuration.

Returns:
Qwen3Config: NeMo configuration for Qwen2 models
Qwen3Config: NeMo configuration for Qwen3 models
"""
if self._tron_config is not None:
return self._tron_config

self._tron_config = _NeMo2HFQwen3Importer.config.fget(self.input_path)
return self._tron_config


@property
def config(self) -> Qwen3Config:
return self.tron_config
Loading