Skip to content

Commit f2165dd

Browse files
committed
Rename nemotronh to nemotron_h for consistency
- Update architecture name from NEMOTRONH to NEMOTRON_H in constants.py - Change architecture string from 'nemotronh' to 'nemotron_h' in all files - Update enum LLM_ARCH_NEMOTRONH to LLM_ARCH_NEMOTRON_H - Update class name llm_build_nemotronh to llm_build_nemotron_h - Consistent naming with underscore convention (nemotron_h vs nemotronh)
1 parent 7503535 commit f2165dd

File tree

5 files changed

+17
-17
lines changed

5 files changed

+17
-17
lines changed

convert_hf_to_gguf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7648,7 +7648,7 @@ def set_vocab(self):
76487648
@ModelBase.register("NemotronHForCausalLM")
76497649
class NemotronHModel(GraniteHybridModel):
76507650
"""Hybrid mamba2/attention model from NVIDIA"""
7651-
model_arch = gguf.MODEL_ARCH.NEMOTRONH
7651+
model_arch = gguf.MODEL_ARCH.NEMOTRON_H
76527652

76537653
def __init__(self, *args, **kwargs):
76547654
super().__init__(*args, **kwargs)

gguf-py/gguf/constants.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ class MODEL_ARCH(IntEnum):
367367
T5ENCODER = auto()
368368
JAIS = auto()
369369
NEMOTRON = auto()
370-
NEMOTRONH = auto()
370+
NEMOTRON_H = auto()
371371
EXAONE = auto()
372372
EXAONE4 = auto()
373373
GRANITE = auto()
@@ -701,7 +701,7 @@ class MODEL_TENSOR(IntEnum):
701701
MODEL_ARCH.T5ENCODER: "t5encoder",
702702
MODEL_ARCH.JAIS: "jais",
703703
MODEL_ARCH.NEMOTRON: "nemotron",
704-
MODEL_ARCH.NEMOTRONH: "nemotronh",
704+
MODEL_ARCH.NEMOTRON_H: "nemotron_h",
705705
MODEL_ARCH.EXAONE: "exaone",
706706
MODEL_ARCH.EXAONE4: "exaone4",
707707
MODEL_ARCH.GRANITE: "granite",
@@ -2299,7 +2299,7 @@ class MODEL_TENSOR(IntEnum):
22992299
MODEL_TENSOR.FFN_DOWN,
23002300
MODEL_TENSOR.FFN_UP,
23012301
],
2302-
MODEL_ARCH.NEMOTRONH: [
2302+
MODEL_ARCH.NEMOTRON_H: [
23032303
MODEL_TENSOR.TOKEN_EMBD,
23042304
MODEL_TENSOR.OUTPUT_NORM,
23052305
MODEL_TENSOR.OUTPUT,

src/llama-arch.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
6969
{ LLM_ARCH_T5ENCODER, "t5encoder" },
7070
{ LLM_ARCH_JAIS, "jais" },
7171
{ LLM_ARCH_NEMOTRON, "nemotron" },
72-
{ LLM_ARCH_NEMOTRONH, "nemotronh" },
72+
{ LLM_ARCH_NEMOTRON_H, "nemotron_h" },
7373
{ LLM_ARCH_EXAONE, "exaone" },
7474
{ LLM_ARCH_EXAONE4, "exaone4" },
7575
{ LLM_ARCH_RWKV6, "rwkv6" },
@@ -1552,7 +1552,7 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
15521552
},
15531553
},
15541554
{
1555-
LLM_ARCH_NEMOTRONH,
1555+
LLM_ARCH_NEMOTRON_H,
15561556
{
15571557
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
15581558
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
@@ -2381,7 +2381,7 @@ bool llm_arch_is_hybrid(const llm_arch & arch) {
23812381
case LLM_ARCH_PLAMO2:
23822382
case LLM_ARCH_GRANITE_HYBRID:
23832383
case LLM_ARCH_LFM2:
2384-
case LLM_ARCH_NEMOTRONH:
2384+
case LLM_ARCH_NEMOTRON_H:
23852385
return true;
23862386
default:
23872387
return false;

src/llama-arch.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ enum llm_arch {
7373
LLM_ARCH_T5ENCODER,
7474
LLM_ARCH_JAIS,
7575
LLM_ARCH_NEMOTRON,
76-
LLM_ARCH_NEMOTRONH,
76+
LLM_ARCH_NEMOTRON_H,
7777
LLM_ARCH_EXAONE,
7878
LLM_ARCH_EXAONE4,
7979
LLM_ARCH_RWKV6,

src/llama-model.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1570,7 +1570,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
15701570
default: type = LLM_TYPE_UNKNOWN;
15711571
}
15721572
} break;
1573-
case LLM_ARCH_NEMOTRONH:
1573+
case LLM_ARCH_NEMOTRON_H:
15741574
{
15751575
ml.get_key(LLM_KV_SSM_CONV_KERNEL, hparams.ssm_d_conv);
15761576
ml.get_key(LLM_KV_SSM_INNER_SIZE, hparams.ssm_d_inner);
@@ -4709,7 +4709,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
47094709
layer.ffn_up_b = create_tensor(tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ff}, TENSOR_NOT_REQUIRED);
47104710
}
47114711
} break;
4712-
case LLM_ARCH_NEMOTRONH:
4712+
case LLM_ARCH_NEMOTRON_H:
47134713
{
47144714
// mamba2 Mixer SSM params
47154715
// NOTE: int64_t for tensor dimensions
@@ -5953,7 +5953,7 @@ void llama_model::print_info() const {
59535953
arch == LLM_ARCH_FALCON_H1 ||
59545954
arch == LLM_ARCH_PLAMO2 ||
59555955
arch == LLM_ARCH_GRANITE_HYBRID ||
5956-
arch == LLM_ARCH_NEMOTRONH) {
5956+
arch == LLM_ARCH_NEMOTRON_H) {
59575957
LLAMA_LOG_INFO("%s: ssm_d_conv = %u\n", __func__, hparams.ssm_d_conv);
59585958
LLAMA_LOG_INFO("%s: ssm_d_inner = %u\n", __func__, hparams.ssm_d_inner);
59595959
LLAMA_LOG_INFO("%s: ssm_d_state = %u\n", __func__, hparams.ssm_d_state);
@@ -14220,8 +14220,8 @@ struct llm_build_nemotron : public llm_graph_context {
1422014220
}
1422114221
};
1422214222

14223-
struct llm_build_nemotronh : public llm_graph_context_mamba {
14224-
llm_build_nemotronh(
14223+
struct llm_build_nemotron_h : public llm_graph_context_mamba {
14224+
llm_build_nemotron_h(
1422514225
const llama_model & model,
1422614226
const llm_graph_params & params) :
1422714227
llm_graph_context_mamba(params) {
@@ -18508,7 +18508,7 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
1850818508
if (arch == LLM_ARCH_FALCON_H1) {
1850918509
filter_attn = [&](int32_t) { return true; };
1851018510
filter_recr = [&](int32_t) { return true; };
18511-
} else if (arch == LLM_ARCH_NEMOTRONH) {
18511+
} else if (arch == LLM_ARCH_NEMOTRON_H) {
1851218512
filter_attn = [&](int32_t il) {
1851318513
return !hparams.is_recurrent(il) && hparams.n_ff(il) == 0;
1851418514
};
@@ -18865,9 +18865,9 @@ ggml_cgraph * llama_model::build_graph(const llm_graph_params & params) const {
1886518865
{
1886618866
llm = std::make_unique<llm_build_nemotron>(*this, params);
1886718867
} break;
18868-
case LLM_ARCH_NEMOTRONH:
18868+
case LLM_ARCH_NEMOTRON_H:
1886918869
{
18870-
llm = std::make_unique<llm_build_nemotronh>(*this, params);
18870+
llm = std::make_unique<llm_build_nemotron_h>(*this, params);
1887118871
} break;
1887218872
case LLM_ARCH_EXAONE:
1887318873
{
@@ -19104,7 +19104,7 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
1910419104
case LLM_ARCH_RWKV7:
1910519105
case LLM_ARCH_ARWKV7:
1910619106
case LLM_ARCH_WAVTOKENIZER_DEC:
19107-
case LLM_ARCH_NEMOTRONH:
19107+
case LLM_ARCH_NEMOTRON_H:
1910819108
return LLAMA_ROPE_TYPE_NONE;
1910919109

1911019110
// use what we call a normal RoPE, operating on pairs of consecutive head values

0 commit comments

Comments
 (0)