Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
3c26375
enable loading falcon-180b ckpt in .safetensors format
schoi-habana Jan 31, 2024
5357858
Address comments borrowing transformer's way of reading ckpt file
schoi-habana Feb 1, 2024
2c0799c
address comments
schoi-habana Feb 5, 2024
fbf1bd2
reformatted
schoi-habana Feb 8, 2024
e5e9234
enable loading falcon-180b ckpt in .safetensors format
schoi-habana Jan 31, 2024
190e29c
Address comments borrowing transformer's way of reading ckpt file
schoi-habana Feb 1, 2024
34fdc00
address comments
schoi-habana Feb 5, 2024
e40e9cc
Merge branch 'main' into dev/schoi/falcon_180b_loading_ckpt
schoi-habana Mar 7, 2024
f5c3029
Update ckpt loading
schoi-habana Feb 14, 2024
0064711
import modeling_utils from transformers
schoi-habana Feb 14, 2024
d90a8e9
enable Falcon FP8 inference
Mar 6, 2024
ffc8f4c
added example command in readme, code cleanup
Mar 7, 2024
69073f0
resolve issues in finetuning
Mar 8, 2024
3ef3e91
enable non reuse cache flow for fp8
Mar 11, 2024
ee8a90a
revert non reuse_cache flow for training due to perf drop
Mar 13, 2024
d90366a
add falcon180B FP8 test
Mar 13, 2024
b1ba7bd
Merge branch 'main' into schoi/falcon_180_quant_OH
schoi-habana Mar 18, 2024
68f6359
fix run_lm_eval.py to save --reuse_cache
schoi-habana Mar 16, 2024
8d045ff
modify comments
schoi-habana Mar 19, 2024
ad9d6e8
Merge branch 'main' into schoi/falcon_180_quant_OH
schoi-habana Mar 21, 2024
2ac8038
add falcon180b FP8 test (#104)
schoi-habana Mar 19, 2024
fe92094
fix Falcon view+inplace error
schoi-habana Mar 20, 2024
f882a9e
Add Llama7b FSDP test for torch.compile mode
pankd Mar 19, 2024
ff87e83
Update ckpt loading
yeonsily Mar 22, 2024
b46132c
Enable Falcon FP8 inference
yeonsily Mar 22, 2024
12064e3
Clean-up BERT-BASE FSDP test
Feb 28, 2024
7a409fa
enable hpu_graph support for wav2vec2-asr (#59)
Feb 29, 2024
f10da08
Run custom ctc_loss only for Gaudi2 (#95)
Mar 7, 2024
6739c09
Update test baseline
Mar 9, 2024
8a59700
Enable Llama2 70B to run with hqt on single card (#50)
libinta Mar 23, 2024
bc0993f
Cherry pick llama fp8 - enable non reuse cache flow for fp8 (#64)
libinta Mar 23, 2024
172406d
Fix merge for PR766
libinta Mar 23, 2024
799a1b7
Fix falcon reuse_cache issue.
libinta Mar 23, 2024
1deba9b
Fix text-generation fp8 test env issue.
libinta Mar 23, 2024
460f923
fix fp8 key error
skaulintel Mar 25, 2024
5496c6b
disable fsdp tests for gaudi1
skaulintel Mar 25, 2024
ea3cd51
Merge branch 'synapse_1.15' into synapse_1.15_addition
regisss Mar 27, 2024
ae98c19
Make style
regisss Mar 27, 2024
f168ddb
Remove ctc_loss_fwd in modeling_wav2vec2
regisss Mar 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 57 additions & 57 deletions optimum/habana/transformers/models/wav2vec2/modeling_wav2vec2.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,63 @@ def _gaudi_wav2vec2_sample_negative_indices(
return sampled_negative_indices


def gaudi_wav2vec2_forward(
self,
input_values: Optional[torch.Tensor],
attention_mask: Optional[torch.Tensor] = None,
mask_time_indices: Optional[torch.FloatTensor] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
) -> Union[Tuple, Wav2Vec2BaseModelOutput]:
"""
Copied from Transformers: https://github.com/huggingface/transformers/blob/bd469c40659ce76c81f69c7726759d249b4aef49/src/transformers/models/wav2vec2/modeling_wav2vec2.py#L1282
The only difference is that a clone of `hidden_states` is given to _mask_hidden_states to avoid an error.
"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
)
return_dict = return_dict if return_dict is not None else self.config.use_return_dict

extract_features = self.feature_extractor(input_values)
extract_features = extract_features.transpose(1, 2)

if attention_mask is not None:
# compute reduced attention_mask corresponding to feature vectors
attention_mask = self._get_feature_vector_attention_mask(
extract_features.shape[1], attention_mask, add_adapter=False
)

hidden_states, extract_features = self.feature_projection(extract_features)
hidden_states = self._mask_hidden_states(
hidden_states.clone(), mask_time_indices=mask_time_indices, attention_mask=attention_mask
)

encoder_outputs = self.encoder(
hidden_states,
attention_mask=attention_mask,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
)

hidden_states = encoder_outputs[0]

if self.adapter is not None:
hidden_states = self.adapter(hidden_states)

if not return_dict:
return (hidden_states, extract_features) + encoder_outputs[1:]

return Wav2Vec2BaseModelOutput(
last_hidden_state=hidden_states,
extract_features=extract_features,
hidden_states=encoder_outputs.hidden_states,
attentions=encoder_outputs.attentions,
)


def _gaudi_wav2vec2_mask_hidden_states(
self,
hidden_states: torch.FloatTensor,
Expand Down Expand Up @@ -318,63 +375,6 @@ def gaudi_wav2vec2_encoder_forward(
)


def gaudi_wav2vec2_forward(
self,
input_values: Optional[torch.Tensor],
attention_mask: Optional[torch.Tensor] = None,
mask_time_indices: Optional[torch.FloatTensor] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
) -> Union[Tuple, Wav2Vec2BaseModelOutput]:
"""
Copied from Transformers: https://github.com/huggingface/transformers/blob/bd469c40659ce76c81f69c7726759d249b4aef49/src/transformers/models/wav2vec2/modeling_wav2vec2.py#L1282
The only difference is that a clone of `hidden_states` is given to _mask_hidden_states to avoid an error.
"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
)
return_dict = return_dict if return_dict is not None else self.config.use_return_dict

extract_features = self.feature_extractor(input_values)
extract_features = extract_features.transpose(1, 2)

if attention_mask is not None:
# compute reduced attention_mask corresponding to feature vectors
attention_mask = self._get_feature_vector_attention_mask(
extract_features.shape[1], attention_mask, add_adapter=False
)

hidden_states, extract_features = self.feature_projection(extract_features)
hidden_states = self._mask_hidden_states(
hidden_states.clone(), mask_time_indices=mask_time_indices, attention_mask=attention_mask
)

encoder_outputs = self.encoder(
hidden_states,
attention_mask=attention_mask,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
)

hidden_states = encoder_outputs[0]

if self.adapter is not None:
hidden_states = self.adapter(hidden_states)

if not return_dict:
return (hidden_states, extract_features) + encoder_outputs[1:]

return Wav2Vec2BaseModelOutput(
last_hidden_state=hidden_states,
extract_features=extract_features,
hidden_states=encoder_outputs.hidden_states,
attentions=encoder_outputs.attentions,
)


def gaudi_wav2vec2_tdnnlayer_forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
"""
Copied from Transformers: https://github.com/huggingface/transformers/blob/v4.37.2/src/transformers/models/wav2vec2/modeling_wav2vec2.py#L2290
Expand Down
11 changes: 5 additions & 6 deletions tests/baselines/llama_7b.json
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,9 @@
"multi_card": {
"learning_rate": 3e-4,
"train_batch_size": 8,
"perplexity": 2.4259,
"train_runtime": 199.94,
"train_samples_per_second": 88.664,
"perplexity": 2.4502,
"train_runtime": 210.305,
"train_samples_per_second": 85.0801,
"extra_arguments": [
"--bf16 True",
"--gradient_accumulation_steps 2",
Expand All @@ -89,7 +89,7 @@
"--adam_epsilon 1e-08",
"--ddp_bucket_cap_mb 50",
"--validation_split_percentage 10",
"--attn_softmax_bf16 True",
"--attn_softmax_bf16",
"--pipelining_fwd_bwd False",
"--fsdp auto_wrap",
"--torch_compile_backend hpu_backend",
Expand All @@ -100,5 +100,4 @@
}
}
}
}

}
59 changes: 31 additions & 28 deletions tests/test_fsdp_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,34 +10,37 @@
from .test_examples import ACCURACY_PERF_FACTOR, TIME_PERF_FACTOR


# Gaudi2 CI baselines
# FSDP is not supported on Gaudi1
MODELS_TO_TEST = {
"bf16": [
(
"bert-base-uncased",
"Habana/bert-base-uncased",
2807,
85.4688,
"question-answering",
24,
8,
"run_qa.py",
"full_shard",
),
(
"meta-llama/Llama-2-7b-hf",
"",
54,
0.92,
"language-modeling",
8,
8,
"run_lora_clm.py",
"auto_wrap",
),
],
}
if os.environ.get("GAUDI2_CI", "0") == "1":
# Gaudi2 CI baselines
MODELS_TO_TEST = {
"bf16": [
(
"bert-base-uncased",
"Habana/bert-base-uncased",
2807,
85.4688,
"question-answering",
24,
8,
"run_qa.py",
"full_shard",
),
(
"meta-llama/Llama-2-7b-hf",
"",
54,
0.92,
"language-modeling",
8,
8,
"run_lora_clm.py",
"auto_wrap",
),
],
}
else:
# FSDP is not supported on Gaudi1
MODELS_TO_TEST = {"bf16": []}


def _test_fsdp(
Expand Down
1 change: 1 addition & 0 deletions tests/test_text_generation_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
("mistralai/Mistral-7B-v0.1", 40.00435417311187),
("microsoft/phi-2", 90.10751623430603),
],
"fp8": [],
"deepspeed": [
("bigscience/bloomz-7b1", 31.044523676681507),
],
Expand Down