Skip to content

Commit d7df38c

Browse files
committed
Merge branch 'master' into electra
2 parents b268e26 + 45d8bd3 commit d7df38c

36 files changed

+271
-76
lines changed

STYLE_GUIDE.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ class PositionEmbedding(keras.layers.Layer):
116116
Args:
117117
sequence_length: The maximum length of the dynamic sequence.
118118
119-
Examples:
119+
Example:
120120
121121
Direct call.
122122
>>> layer = keras_nlp.layers.PositionEmbedding(sequence_length=10)

keras_nlp/layers/modeling/alibi_bias.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,15 @@ class AlibiBias(keras.layers.Layer):
3535
each head. The heads' slopes are a geometric sequence that starts at
3636
`2**(-alibi_bias_max/num_heads)` and uses that same value as its
3737
ratio. Defaults to 8.
38+
**kwargs: other keyword arguments passed to `keras.layers.Layer`,
39+
including `name`, `trainable`, `dtype` etc.
40+
3841
Call arguments:
3942
attention_scores: The result of multipying the query and the key of the
4043
multi-head attention layer of the transformer to add alibi bias to
4144
it. With shape `(batch_size, num_heads, query_length, key_length)`.
4245
43-
Examples:
46+
Example:
4447
```python
4548
query_length = 10
4649
key_length = 10

keras_nlp/layers/modeling/f_net_encoder.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,10 @@ class FNetEncoder(keras.layers.Layer):
4747
bias_initializer: "string" or `keras.initializers` initializer.
4848
The bias initializer for the dense layers.
4949
Defaults to `"zeros"`.
50-
name: string. The name of the layer. Defaults to `None`.
51-
**kwargs: other keyword arguments.
50+
**kwargs: other keyword arguments passed to `keras.layers.Layer`,
51+
including `name`, `trainable`, `dtype` etc.
5252
53-
Examples:
53+
Example:
5454
5555
```python
5656
# Create a single FNet encoder layer.
@@ -79,10 +79,9 @@ def __init__(
7979
layer_norm_epsilon=1e-5,
8080
kernel_initializer="glorot_uniform",
8181
bias_initializer="zeros",
82-
name=None,
8382
**kwargs
8483
):
85-
super().__init__(name=name, **kwargs)
84+
super().__init__(**kwargs)
8685
self.intermediate_dim = intermediate_dim
8786
self.dropout = dropout
8887
self.activation = keras.activations.get(activation)

keras_nlp/layers/modeling/masked_lm_head.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,10 @@ class MaskedLMHead(keras.layers.Layer):
5959
bias_initializer: string or `keras.initializers` initializer.
6060
The bias initializer for the dense and multiheaded
6161
attention layers. Defaults to `"zeros"`.
62+
**kwargs: other keyword arguments passed to `keras.layers.Layer`,
63+
including `name`, `trainable`, `dtype` etc.
6264
63-
Examples:
65+
Example:
6466
6567
```python
6668
batch_size = 16

keras_nlp/layers/modeling/position_embedding.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ class PositionEmbedding(keras.layers.Layer):
3333
initializer: The initializer to use for the embedding weights. Defaults
3434
to `"glorot_uniform"`.
3535
seq_axis: The axis of the input tensor where we add the embeddings.
36+
**kwargs: other keyword arguments passed to `keras.layers.Layer`,
37+
including `name`, `trainable`, `dtype` etc.
3638
3739
Call arguments:
3840
inputs: The tensor inputs to compute an embedding for, with shape
@@ -43,7 +45,7 @@ class PositionEmbedding(keras.layers.Layer):
4345
compute the position embedding from. This is useful during cached
4446
decoding, where each position is predicted separately in a loop.
4547
46-
Examples:
48+
Example:
4749
4850
Called directly on input.
4951
>>> layer = keras_nlp.layers.PositionEmbedding(sequence_length=10)

keras_nlp/layers/modeling/reversible_embedding.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,14 +52,16 @@ class ReversibleEmbedding(keras.layers.Embedding):
5252
reverse_dtype: The dtype for the reverse projection computation.
5353
For stability, it is usually best to use full precision even when
5454
working with half or mixed precision training.
55+
**kwargs: other keyword arguments passed to `keras.layers.Embedding`,
56+
including `name`, `trainable`, `dtype` etc.
5557
5658
Call arguments:
5759
inputs: The tensor inputs to the layer.
5860
reverse: Boolean. If `True` the layer will perform a linear projection
5961
from `output_dim` to `input_dim`, instead of a normal embedding
6062
call. Default to `False`.
6163
62-
Examples:
64+
Example:
6365
```python
6466
batch_size = 16
6567
vocab_size = 100

keras_nlp/layers/modeling/rotary_embedding.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ class RotaryEmbedding(keras.layers.Layer):
3838
scaling_factor: float. The scaling factor used to scale frequency range.
3939
sequence_axis: int. Sequence axis in the input tensor.
4040
feature_axis: int. Feature axis in the input tensor.
41+
**kwargs: other keyword arguments passed to `keras.layers.Layer`,
42+
including `name`, `trainable`, `dtype` etc.
4143
4244
Call arguments:
4345
inputs: The tensor inputs to apply the embedding to. This can have

keras_nlp/layers/modeling/sine_position_encoding.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ class SinePositionEncoding(keras.layers.Layer):
3434
max_wavelength: The maximum angular wavelength of the sine/cosine
3535
curves, as described in Attention is All You Need. Defaults to
3636
`10000`.
37+
**kwargs: other keyword arguments passed to `keras.layers.Layer`,
38+
including `name`, `trainable`, `dtype` etc.
3739
3840
Call arguments:
3941
inputs: The tensor inputs to compute an embedding for, with shape
@@ -42,7 +44,7 @@ class SinePositionEncoding(keras.layers.Layer):
4244
compute the encoding from. This is useful during cached decoding,
4345
where each position is predicted separately in a loop.
4446
45-
Examples:
47+
Example:
4648
```python
4749
# create a simple embedding layer with sinusoidal positional encoding
4850
seq_len = 100

keras_nlp/layers/modeling/token_and_position_embedding.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ class TokenAndPositionEmbedding(keras.layers.Layer):
3333
vocabulary_size: The size of the vocabulary.
3434
sequence_length: The maximum length of input sequence
3535
embedding_dim: The output dimension of the embedding layer
36+
tie_weights: Boolean, whether or not the matrix for embedding and
37+
the matrix for the `reverse` projection should share the same
38+
weights.
3639
embeddings_initializer: The initializer to use for the Embedding
3740
Layers
3841
mask_zero: Boolean, whether or not the input value 0 is a special
@@ -43,8 +46,10 @@ class TokenAndPositionEmbedding(keras.layers.Layer):
4346
If mask_zero` is set to True, as a consequence, index 0 cannot be
4447
used in the vocabulary
4548
(input_dim should equal size of vocabulary + 1).
49+
**kwargs: other keyword arguments passed to `keras.layers.Layer`,
50+
including `name`, `trainable`, `dtype` etc.
4651
47-
Examples:
52+
Example:
4853
```python
4954
inputs = np.ones(shape=(1, 50), dtype="int32")
5055
embedding_layer = keras_nlp.layers.TokenAndPositionEmbedding(

keras_nlp/layers/modeling/transformer_decoder.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,10 @@ class TransformerDecoder(keras.layers.Layer):
6969
(similar to GPT-2). If set to False, outputs of attention layer and
7070
intermediate dense layer are normalized (similar to BERT).
7171
Defaults to `False`.
72-
name: string. The name of the layer. Defaults to `None`.
73-
**kwargs: other keyword arguments.
72+
**kwargs: other keyword arguments passed to `keras.layers.Layer`,
73+
including `name`, `trainable`, `dtype` etc.
7474
75-
Examples:
75+
Example:
7676
```python
7777
# Create a single transformer decoder layer.
7878
decoder = keras_nlp.layers.TransformerDecoder(

0 commit comments

Comments
 (0)