From d6d905b24270e97e489ac14f6a416cbe710bf1e3 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nik.borisov@suse.com>
Date: Mon, 6 Nov 2023 13:01:00 +0200
Subject: [PATCH 1/2] convert: Fix detection of LLAMA2

In recent downloads of LLAMA2 dataset the norm_eps is set to 1e-06, this
leads to convert.py erroneously considering the model to be LLAMA1 and
setting the context to 2k tokens.

Fix it by extending the existing hack to also check for the 1e-06 value.
---
 convert.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/convert.py b/convert.py
index 9110f15806c6b..d88dd096d71fa 100755
--- a/convert.py
+++ b/convert.py
@@ -250,7 +250,7 @@ def loadOriginalParamsJson(model: LazyModel, config_path: Path) -> Params:
         if config.get("rope_theta") == 1000000:
             # CodeLlama
             n_ctx = 16384
-        elif config["norm_eps"] == 1e-05:
+        elif config["norm_eps"] in (1e-05, 1e-06):
             # LLaMA v2
             n_ctx = 4096
         else:

From f36a777bbc20ccf1e61d3d19777ecf57461c8dd7 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nik.borisov@suse.com>
Date: Mon, 6 Nov 2023 13:03:31 +0200
Subject: [PATCH 2/2] convert: Fix handling of LLAMA2 vocab_size = -1

When vocab_size is detected to be -1 simply remove its value from the
parsed params.json and fallback to using the tok_embeddings.weight.

Fixes  #3900
---
 convert.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/convert.py b/convert.py
index d88dd096d71fa..1c30df6569b02 100755
--- a/convert.py
+++ b/convert.py
@@ -253,6 +253,11 @@ def loadOriginalParamsJson(model: LazyModel, config_path: Path) -> Params:
         elif config["norm_eps"] in (1e-05, 1e-06):
             # LLaMA v2
             n_ctx = 4096
+            # For some reason FB writes -1 to vocab size for their LLAMA2 models
+            # simply remove this bogus value and let the return statement belo
+            # figure it out
+            if config["vocab_size"] == -1:
+                del config["vocab_size"]
         else:
             # LLaMA v1
             n_ctx = 2048