Skip to content

Commit

Permalink
support partial_rotary_factor
Browse files Browse the repository at this point in the history
  • Loading branch information
irexyc committed Oct 25, 2024
1 parent a22b52d commit cc4cce7
Showing 1 changed file with 7 additions and 0 deletions.
7 changes: 7 additions & 0 deletions lmdeploy/turbomind/deploy/source_model/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ def model_info(self):
model_arg.get('max_position_embeddings', 0))
rope_scaling = model_arg.get('rope_scaling', None)
scaling_factor = 0.0
rotary_embedding = hidden_units // attn_head_num
use_dynamic_ntk = 0
scaling_type = ''
low_freq_factor = 1.0
Expand All @@ -231,6 +232,11 @@ def model_info(self):
scaling_type = llama2_scaling_type if llama2_scaling_type \
else llama3_scaling_type
scaling_factor = rope_scaling.get('factor', 0.0)
if 'partial_rotary_factor' in rope_scaling:
partial_rotary_factor = float(
rope_scaling.get('partial_rotary_factor'))
rotary_embedding = int(
rotary_embedding * partial_rotary_factor)
if scaling_type == 'dynamic':
use_dynamic_ntk = 1
elif scaling_type == 'llama3':
Expand Down Expand Up @@ -260,6 +266,7 @@ def model_info(self):
use_dynamic_ntk=use_dynamic_ntk,
rope_scaling_type=scaling_type,
rope_scaling_factor=scaling_factor,
rotary_embedding=rotary_embedding,
low_freq_factor=low_freq_factor,
high_freq_factor=high_freq_factor,
attention_factor=attention_factor,
Expand Down

0 comments on commit cc4cce7

Please sign in to comment.