Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ def export_onnx_models(
precision == Precision.FLOAT16,
model.config.encoder_attention_heads,
model.config.d_model,
model.config.num_hidden_layers,
model.config.decoder_layers,
use_external_data_format,
use_gpu=use_gpu,
provider=provider,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
torch>=2.7.0
transformers>=4.52.3
transformers==4.52.3
openai-whisper==20240927
ffmpeg-python
datasets
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def input_names(self):
*list(
chain.from_iterable(
(f"past_key_self_{i}", f"past_value_self_{i}", f"past_key_cross_{i}", f"past_value_cross_{i}")
for i in range(self.config.num_hidden_layers)
for i in range(self.config.decoder_layers)
)
),
]
Expand All @@ -205,7 +205,7 @@ def output_names(self):
f"present_key_cross_{i}",
f"present_value_cross_{i}",
)
for i in range(self.config.num_hidden_layers)
for i in range(self.config.decoder_layers)
)
),
]
Expand All @@ -214,8 +214,7 @@ def output_names(self):
"logits",
*list(
chain.from_iterable(
(f"present_key_self_{i}", f"present_value_self_{i}")
for i in range(self.config.num_hidden_layers)
(f"present_key_self_{i}", f"present_value_self_{i}") for i in range(self.config.decoder_layers)
)
),
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def output_names(self):
*list(
chain.from_iterable(
(f"present_key_cross_{i}", f"present_value_cross_{i}")
for i in range(self.config.num_hidden_layers)
for i in range(self.config.decoder_layers)
)
),
]
Expand All @@ -143,7 +143,7 @@ def output_names(self):
f"present_key_cross_{i}",
f"present_value_cross_{i}",
)
for i in range(self.config.num_hidden_layers)
for i in range(self.config.decoder_layers)
)
),
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -763,7 +763,7 @@ def optimize_onnx(
is_float16: bool,
num_attention_heads: int,
hidden_size: int,
num_layers: int,
num_decoder_layers: int,
use_external_data_format: bool = False,
use_gpu: bool = False,
provider: str = "cpu",
Expand Down Expand Up @@ -801,7 +801,7 @@ def optimize_onnx(
m = add_cache_indirection_to_mha(m, past_seq_len_name)

if output_qk:
m = add_output_qk_to_mha(m, skip_node_idxs=list(range(0, 2 * num_layers, 2)))
m = add_output_qk_to_mha(m, skip_node_idxs=list(range(0, 2 * num_decoder_layers, 2)))

m.save_model_to_file(optimized_model_path, use_external_data_format, all_tensors_to_one_file=True)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,14 @@ def get_sample_past_key_values(
torch.rand(batch_size, num_heads, past_seq_len, head_size, device=device, dtype=torch_dtype),
torch.rand(batch_size, num_heads, past_seq_len, head_size, device=device, dtype=torch_dtype),
)
for _ in range(config.num_hidden_layers)
for _ in range(config.decoder_layers)
]
cross_attention_kv_caches = [
(
torch.rand(batch_size, num_heads, max_source_positions, head_size, device=device, dtype=torch_dtype),
torch.rand(batch_size, num_heads, max_source_positions, head_size, device=device, dtype=torch_dtype),
)
for _ in range(config.num_hidden_layers)
for _ in range(config.decoder_layers)
]
return flatten_past_key_values(self_attention_kv_caches, cross_attention_kv_caches)

Expand Down Expand Up @@ -187,7 +187,7 @@ def get_sample_QKs( # noqa: N802
torch.rand(
batch_size, num_heads, sequence_length, config.max_source_positions, device=device, dtype=torch_dtype
)
for _ in range(config.num_hidden_layers)
for _ in range(config.decoder_layers)
]
return QKs

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def input_names(self):
"alignment_heads",
"sot_sequence_length",
"segment_length",
*[f"cross_qk_{i}" for i in range(self.config.num_hidden_layers)],
*[f"cross_qk_{i}" for i in range(self.config.decoder_layers)],
]
return input_names

Expand Down
Loading