@@ -1570,7 +1570,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
15701570 default: type = LLM_TYPE_UNKNOWN;
15711571 }
15721572 } break;
1573- case LLM_ARCH_NEMOTRONH :
1573+ case LLM_ARCH_NEMOTRON_H :
15741574 {
15751575 ml.get_key(LLM_KV_SSM_CONV_KERNEL, hparams.ssm_d_conv);
15761576 ml.get_key(LLM_KV_SSM_INNER_SIZE, hparams.ssm_d_inner);
@@ -4709,7 +4709,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
47094709 layer.ffn_up_b = create_tensor(tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ff}, TENSOR_NOT_REQUIRED);
47104710 }
47114711 } break;
4712- case LLM_ARCH_NEMOTRONH :
4712+ case LLM_ARCH_NEMOTRON_H :
47134713 {
47144714 // mamba2 Mixer SSM params
47154715 // NOTE: int64_t for tensor dimensions
@@ -5953,7 +5953,7 @@ void llama_model::print_info() const {
59535953 arch == LLM_ARCH_FALCON_H1 ||
59545954 arch == LLM_ARCH_PLAMO2 ||
59555955 arch == LLM_ARCH_GRANITE_HYBRID ||
5956- arch == LLM_ARCH_NEMOTRONH ) {
5956+ arch == LLM_ARCH_NEMOTRON_H ) {
59575957 LLAMA_LOG_INFO("%s: ssm_d_conv = %u\n", __func__, hparams.ssm_d_conv);
59585958 LLAMA_LOG_INFO("%s: ssm_d_inner = %u\n", __func__, hparams.ssm_d_inner);
59595959 LLAMA_LOG_INFO("%s: ssm_d_state = %u\n", __func__, hparams.ssm_d_state);
@@ -14220,8 +14220,8 @@ struct llm_build_nemotron : public llm_graph_context {
1422014220 }
1422114221};
1422214222
14223- struct llm_build_nemotronh : public llm_graph_context_mamba {
14224- llm_build_nemotronh (
14223+ struct llm_build_nemotron_h : public llm_graph_context_mamba {
14224+ llm_build_nemotron_h (
1422514225 const llama_model & model,
1422614226 const llm_graph_params & params) :
1422714227 llm_graph_context_mamba(params) {
@@ -18508,7 +18508,7 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
1850818508 if (arch == LLM_ARCH_FALCON_H1) {
1850918509 filter_attn = [&](int32_t) { return true; };
1851018510 filter_recr = [&](int32_t) { return true; };
18511- } else if (arch == LLM_ARCH_NEMOTRONH ) {
18511+ } else if (arch == LLM_ARCH_NEMOTRON_H ) {
1851218512 filter_attn = [&](int32_t il) {
1851318513 return !hparams.is_recurrent(il) && hparams.n_ff(il) == 0;
1851418514 };
@@ -18865,9 +18865,9 @@ ggml_cgraph * llama_model::build_graph(const llm_graph_params & params) const {
1886518865 {
1886618866 llm = std::make_unique<llm_build_nemotron>(*this, params);
1886718867 } break;
18868- case LLM_ARCH_NEMOTRONH :
18868+ case LLM_ARCH_NEMOTRON_H :
1886918869 {
18870- llm = std::make_unique<llm_build_nemotronh >(*this, params);
18870+ llm = std::make_unique<llm_build_nemotron_h >(*this, params);
1887118871 } break;
1887218872 case LLM_ARCH_EXAONE:
1887318873 {
@@ -19104,7 +19104,7 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
1910419104 case LLM_ARCH_RWKV7:
1910519105 case LLM_ARCH_ARWKV7:
1910619106 case LLM_ARCH_WAVTOKENIZER_DEC:
19107- case LLM_ARCH_NEMOTRONH :
19107+ case LLM_ARCH_NEMOTRON_H :
1910819108 return LLAMA_ROPE_TYPE_NONE;
1910919109
1911019110 // use what we call a normal RoPE, operating on pairs of consecutive head values
0 commit comments