Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
ae09d5b
fix tolerance for a bloom slow test
younesbelkada Jun 9, 2022
a3be071
enhance alibi padding
younesbelkada Jun 17, 2022
fcfe5b7
optimize attention mask
younesbelkada Jul 6, 2022
23d8eb3
fix scaled softmax limit values
NouamaneTazi Jul 7, 2022
6ac1206
Merge branch 'main' of https://github.com/huggingface/transformers in…
NouamaneTazi Jul 7, 2022
287d3c7
optimize building alibi tensor
NouamaneTazi Jul 7, 2022
342dad1
fix attention_mask shape when it's None
NouamaneTazi Jul 7, 2022
563c323
minor fixes
younesbelkada Jul 7, 2022
082a1b8
remove colons in docstring
younesbelkada Jul 7, 2022
af8dce9
Apply suggestions from code review
younesbelkada Jul 7, 2022
2dc5134
apply suggestion
younesbelkada Jul 7, 2022
9fbfc36
remove unsued arg
younesbelkada Jul 7, 2022
7ed70e4
refactor a bit
younesbelkada Jul 7, 2022
6b8fb39
refactor attention block
younesbelkada Jul 8, 2022
68a4d39
quick fixes
NouamaneTazi Jul 8, 2022
53bef9b
first attempt
younesbelkada Jul 8, 2022
773d8e7
refactor attention block and fix all tests except "test_simple_genera…
NouamaneTazi Jul 9, 2022
ed93b22
remove debug lines and add TODO comment
NouamaneTazi Jul 10, 2022
91dfee5
change `torch.bmm` to `torch.baddbmm`
NouamaneTazi Jul 10, 2022
2272cb0
styling
NouamaneTazi Jul 10, 2022
abaf17f
Merge branch 'main' of https://github.com/huggingface/transformers in…
NouamaneTazi Jul 10, 2022
eb86c43
all tests are passing now
NouamaneTazi Jul 11, 2022
3ba1bd2
styling
NouamaneTazi Jul 11, 2022
18cc4d4
fix support for accelerate
NouamaneTazi Jul 11, 2022
1f95e29
Apply suggestions from code review
younesbelkada Jul 11, 2022
34af267
remove attn softmax in fp32
younesbelkada Jul 11, 2022
fa40cc8
Merge branch 'bloom-enhance-alibi' of https://github.com/younesbelkad…
younesbelkada Jul 11, 2022
adb67d6
refactor comments
younesbelkada Jul 11, 2022
ed9682d
refactor a bit
younesbelkada Jul 11, 2022
3ef948c
refer to pytorch t5
younesbelkada Jul 11, 2022
992e318
change the slow tests
younesbelkada Jul 11, 2022
0b3db53
update expected output for `test_simple_generation`
NouamaneTazi Jul 11, 2022
81bf622
make style + change comments a bit
younesbelkada Jul 11, 2022
bf31148
fix dtype padd test
younesbelkada Jul 11, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions src/transformers/models/bloom/configuration_bloom.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,6 @@ class BloomConfig(PretrainedConfig):
If set to `True`, it will skip bias add for each linear layer in the transformer blocks
skip_bias_add_qkv (`bool`, *optional*, defaults to `False`):
If set to `True`, it will skip bias add for the first linear layer in the transformer blocks
attention_softmax_in_fp32 (`bool`, *optional*, defaults to `True`):
If set to `True` and the `dtype` is set to `float16` it will scale the input of the Softmax function to
`fp32`
hidden_dropout (`float`, *optional*, defaults to 0.1):
Dropout rate of the dropout function on the bias dropout.
attention_dropout (`float`, *optional*, defaults to 0.1):
Expand Down Expand Up @@ -128,7 +125,6 @@ def __init__(
hidden_size=64,
n_layer=2,
n_head=8,
masked_softmax_fusion=True,
layer_norm_epsilon=1e-5,
initializer_range=0.02,
use_cache=False,
Expand All @@ -137,7 +133,6 @@ def __init__(
apply_residual_connection_post_layernorm=False,
hidden_dropout=0.0,
attention_dropout=0.0,
attention_softmax_in_fp32=True,
pretraining_tp=1, # TP rank used when training with megatron
dtype="bfloat16",
slow_but_exact=False,
Expand All @@ -147,15 +142,13 @@ def __init__(
self.hidden_size = hidden_size
self.n_layer = n_layer
self.n_head = n_head
self.masked_softmax_fusion = masked_softmax_fusion
self.layer_norm_epsilon = layer_norm_epsilon
self.initializer_range = initializer_range
self.use_cache = use_cache
self.pretraining_tp = pretraining_tp
self.apply_residual_connection_post_layernorm = apply_residual_connection_post_layernorm
self.hidden_dropout = hidden_dropout
self.attention_dropout = attention_dropout
self.attention_softmax_in_fp32 = attention_softmax_in_fp32

self.bos_token_id = bos_token_id
self.eos_token_id = eos_token_id
Expand Down
Loading