From 8d0d83b391ae573766f36bcc6d22a11d2ad91c78 Mon Sep 17 00:00:00 2001 From: Nigel Bosch Date: Thu, 24 Aug 2023 15:09:22 -0500 Subject: [PATCH 1/3] Get rope scale from HF models --- convert.py | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/convert.py b/convert.py index 10276bf630031..bc474ac7b47be 100755 --- a/convert.py +++ b/convert.py @@ -105,6 +105,7 @@ class Params: f_norm_eps: float f_rope_freq_base: Optional[float] = None + f_rope_scale: Optional[float] = None ftype: Optional[GGMLFileType] = None @@ -160,13 +161,14 @@ def guessed(model: 'LazyModel') -> 'Params': def loadHFTransformerJson(model: 'LazyModel', config_path: 'Path') -> 'Params': config = json.load(open(config_path)) - n_vocab = config["vocab_size"] - n_embd = config["hidden_size"] - n_layer = config["num_hidden_layers"] - n_ff = config["intermediate_size"] - n_head = config["num_attention_heads"] - n_head_kv = config["num_key_value_heads"] if "num_key_value_heads" in config else n_head - f_norm_eps = config["rms_norm_eps"] + n_vocab = config["vocab_size"] + n_embd = config["hidden_size"] + n_layer = config["num_hidden_layers"] + n_ff = config["intermediate_size"] + n_head = config["num_attention_heads"] + n_head_kv = config["num_key_value_heads"] if "num_key_value_heads" in config else n_head + f_norm_eps = config["rms_norm_eps"] + f_rope_scale = config["rope_scaling"]["factor"] if "rope_scaling" in config and "factor" in config["rope_scaling"] else None n_mult = Params.find_n_mult(n_ff, n_embd) @@ -179,15 +181,16 @@ def loadHFTransformerJson(model: 'LazyModel', config_path: 'Path') -> 'Params': "Suggestion: provide 'config.json' of the model in the same directory containing model files.") return Params( - n_vocab = n_vocab, - n_embd = n_embd, - n_mult = n_mult, - n_layer = n_layer, - n_ctx = n_ctx, - n_ff = n_ff, - n_head = n_head, - n_head_kv = n_head_kv, - f_norm_eps = f_norm_eps, + n_vocab = n_vocab, + n_embd = n_embd, + n_mult = n_mult, + n_layer = n_layer, + n_ctx = n_ctx, + n_ff = n_ff, + n_head = n_head, + n_head_kv = n_head_kv, + f_norm_eps = f_norm_eps, + f_rope_scale = f_rope_scale, ) # LLaMA v2 70B params.json @@ -771,6 +774,9 @@ def add_meta_arch(self, params: Params) -> None: if params.f_rope_freq_base: self.gguf.add_rope_freq_base(params.f_rope_freq_base) + if params.f_rope_scale: + self.gguf.add_rope_scale_linear(params.f_rope_scale) + if params.ftype: self.gguf.add_file_type(params.ftype) From 8ac33ce0fffcbdf4a2acb382e89fe238e3c5825a Mon Sep 17 00:00:00 2001 From: Nigel Bosch Date: Thu, 24 Aug 2023 16:25:06 -0500 Subject: [PATCH 2/3] Save rope scale only for linear scaling --- convert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert.py b/convert.py index bc474ac7b47be..ee8a671a109a2 100755 --- a/convert.py +++ b/convert.py @@ -168,7 +168,7 @@ def loadHFTransformerJson(model: 'LazyModel', config_path: 'Path') -> 'Params': n_head = config["num_attention_heads"] n_head_kv = config["num_key_value_heads"] if "num_key_value_heads" in config else n_head f_norm_eps = config["rms_norm_eps"] - f_rope_scale = config["rope_scaling"]["factor"] if "rope_scaling" in config and "factor" in config["rope_scaling"] else None + f_rope_scale = config.get("rope_scaling", {}).get("factor", None) if config.get("rope_scaling", {}).get("type", "") == "linear" else None n_mult = Params.find_n_mult(n_ff, n_embd) From aa896e790b2f104b826ef9e60a40b741f94a898b Mon Sep 17 00:00:00 2001 From: Nigel Bosch Date: Thu, 24 Aug 2023 17:22:53 -0500 Subject: [PATCH 3/3] Rewrite for clarity --- convert.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/convert.py b/convert.py index ee8a671a109a2..91e68ff188e51 100755 --- a/convert.py +++ b/convert.py @@ -168,7 +168,11 @@ def loadHFTransformerJson(model: 'LazyModel', config_path: 'Path') -> 'Params': n_head = config["num_attention_heads"] n_head_kv = config["num_key_value_heads"] if "num_key_value_heads" in config else n_head f_norm_eps = config["rms_norm_eps"] - f_rope_scale = config.get("rope_scaling", {}).get("factor", None) if config.get("rope_scaling", {}).get("type", "") == "linear" else None + + if "rope_scaling" in config and config["rope_scaling"].get("type") == "linear": + f_rope_scale = config["rope_scaling"].get("factor") + else: + f_rope_scale = None n_mult = Params.find_n_mult(n_ff, n_embd)