@@ -947,6 +947,7 @@ def convert_hf_model_config(model_name: str, **kwargs):
947947 "NTK_by_parts_low_freq_factor" : 1.0 ,
948948 "NTK_by_parts_high_freq_factor" : 4.0 ,
949949 "NTK_by_parts_factor" : 32.0 ,
950+ "NTK_original_ctx_len" : 8192 ,
950951 }
951952 elif "Llama-3.2-3B" in official_model_name :
952953 cfg_dict = {
@@ -971,6 +972,7 @@ def convert_hf_model_config(model_name: str, **kwargs):
971972 "NTK_by_parts_low_freq_factor" : 1.0 ,
972973 "NTK_by_parts_high_freq_factor" : 4.0 ,
973974 "NTK_by_parts_factor" : 32.0 ,
975+ "NTK_original_ctx_len" : 8192 ,
974976 }
975977 elif "Llama-3.3-70B" in official_model_name :
976978 cfg_dict = {
@@ -995,6 +997,7 @@ def convert_hf_model_config(model_name: str, **kwargs):
995997 "NTK_by_parts_low_freq_factor" : 1.0 ,
996998 "NTK_by_parts_high_freq_factor" : 4.0 ,
997999 "NTK_by_parts_factor" : 8.0 ,
1000+ "NTK_original_ctx_len" : 8192 ,
9981001 }
9991002 elif "Llama-3.1-8B" in official_model_name :
10001003 cfg_dict = {
@@ -1019,6 +1022,7 @@ def convert_hf_model_config(model_name: str, **kwargs):
10191022 "NTK_by_parts_low_freq_factor" : 1.0 ,
10201023 "NTK_by_parts_high_freq_factor" : 4.0 ,
10211024 "NTK_by_parts_factor" : 8.0 ,
1025+ "NTK_original_ctx_len" : 8192 ,
10221026 }
10231027 elif "Llama-3.1-70B" in official_model_name :
10241028 cfg_dict = {
@@ -1043,6 +1047,7 @@ def convert_hf_model_config(model_name: str, **kwargs):
10431047 "NTK_by_parts_low_freq_factor" : 1.0 ,
10441048 "NTK_by_parts_high_freq_factor" : 4.0 ,
10451049 "NTK_by_parts_factor" : 8.0 ,
1050+ "NTK_original_ctx_len" : 8192 ,
10461051 }
10471052 elif architecture == "GPTNeoForCausalLM" :
10481053 cfg_dict = {
0 commit comments