114114
115115// bump if necessary
116116#define LLAMA_MAX_NODES   8192
117- #define LLAMA_MAX_LAYERS  256 
117+ #define LLAMA_MAX_LAYERS  512 
118118#define LLAMA_MAX_EXPERTS 160  // DeepSeekV2
119119
120120//
@@ -4007,7 +4007,9 @@ struct llama_model_loader {
40074007                throw std::runtime_error(format("%s is not a float32, int32 array", key.c_str()));
40084008        }
40094009
4010-         GGML_ASSERT(arr_info.length <= N_MAX);
4010+         if (arr_info.length > N_MAX) {
4011+             throw std::runtime_error(format("array length %u for key %s exceeds max %u", (uint32_t) arr_info.length, key.c_str(), (uint32_t) N_MAX));
4012+         }
40114013
40124014        std::copy((const T*)arr_info.data, (const T *)arr_info.data + arr_info.length, result.begin());
40134015
@@ -4043,8 +4045,6 @@ struct llama_model_loader {
40434045    // get array of n <= N_MAX elements, or a single element repeated n times
40444046    template<typename T, size_t N_MAX>
40454047    bool get_key_or_arr(const std::string & key, std::array<T, N_MAX> & result, uint32_t n, const bool required = true) {
4046-         GGML_ASSERT(n <= N_MAX);
4047- 
40484048        const int kid = gguf_find_key(meta, key.c_str());
40494049
40504050        if (kid < 0) {
@@ -4054,6 +4054,10 @@ struct llama_model_loader {
40544054            return false;
40554055        }
40564056
4057+         if (n > N_MAX) {
4058+             throw std::runtime_error(format("n > N_MAX: %u > %u for key %s", (uint32_t) n, (uint32_t) N_MAX, key.c_str()));
4059+         }
4060+ 
40574061        if (gguf_get_kv_type(meta, kid) == GGUF_TYPE_ARRAY) {
40584062            struct GGUFMeta::ArrayInfo arr_info =
40594063                GGUFMeta::GKV<GGUFMeta::ArrayInfo>::get_kv(meta, kid);
@@ -19920,7 +19924,7 @@ size_t llama_state_get_size(const struct llama_context * ctx) {
1992019924    );
1992119925
1992219926    // on session change it is very likely that the state size has changed - so we need to update this function
19923-     static_assert(LLAMA_SESSION_VERSION == 6 , "So you just bumped the session version - good. But did you remember to update llama_state_get_size?");
19927+     static_assert(LLAMA_SESSION_VERSION == 7 , "So you just bumped the session version - good. But did you remember to update llama_state_get_size?");
1992419928
1992519929    return s_total;
1992619930}
0 commit comments