Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions keras_nlp/layers/masked_lm_mask_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,11 +147,7 @@ def call(self, inputs):
# convert dense to ragged.
inputs = tf.RaggedTensor.from_tensor(inputs)

(
token_ids,
mask_positions,
mask_ids,
) = tf_text.mask_language_model(
(token_ids, mask_positions, mask_ids,) = tf_text.mask_language_model(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you update your install of black (pip install -U black), you should get the latest formatting here.

inputs,
item_selector=self._random_selector,
mask_values_chooser=self._mask_values_chooser,
Expand Down
52 changes: 36 additions & 16 deletions keras_nlp/models/albert/albert_presets.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,15 @@
"num_segments": 2,
},
"preprocessor_config": {},
"description": (
"Base size of ALBERT where all input is lowercased. "
"Trained on English Wikipedia + BooksCorpus."
),
"metadata": {
"description": (
"Base size of ALBERT where all input is lowercased. "
"Trained on English Wikipedia + BooksCorpus."
),
"params": 11683584,
"official_name": "ALBERT",
"path": "albert",
},
"weights_url": "https://storage.googleapis.com/keras-nlp/models/albert_base_en_uncased/v1/model.h5",
"weights_hash": "b83ccf3418dd84adc569324183176813",
"spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/albert_base_en_uncased/v1/vocab.spm",
Expand All @@ -54,10 +59,15 @@
"num_segments": 2,
},
"preprocessor_config": {},
"description": (
"Large size of ALBERT where all input is lowercased. "
"Trained on English Wikipedia + BooksCorpus."
),
"metadata": {
"description": (
"Large size of ALBERT where all input is lowercased. "
"Trained on English Wikipedia + BooksCorpus."
),
"params": 17683968,
"official_name": "ALBERT",
"path": "albert",
},
"weights_url": "https://storage.googleapis.com/keras-nlp/models/albert_large_en_uncased/v1/model.h5",
"weights_hash": "c7754804efb245f06dd6e7ced32e082c",
"spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/albert_large_en_uncased/v1/vocab.spm",
Expand All @@ -78,10 +88,15 @@
"num_segments": 2,
},
"preprocessor_config": {},
"description": (
"Extra Large size of ALBERT where all input is lowercased. "
"Trained on English Wikipedia + BooksCorpus."
),
"metadata": {
"description": (
"Extra Large size of ALBERT where all input is lowercased. "
"Trained on English Wikipedia + BooksCorpus."
),
"params": 58724864,
"official_name": "ALBERT",
"path": "albert",
},
"weights_url": "https://storage.googleapis.com/keras-nlp/models/albert_extra_large_en_uncased/v1/model.h5",
"weights_hash": "713209be8aadfa614fd79f18c9aeb16d",
"spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/albert_extra_large_en_uncased/v1/vocab.spm",
Expand All @@ -102,10 +117,15 @@
"num_segments": 2,
},
"preprocessor_config": {},
"description": (
"Extra Large size of ALBERT where all input is lowercased. "
"Trained on English Wikipedia + BooksCorpus."
),
"metadata": {
"description": (
"Extra Large size of ALBERT where all input is lowercased. "
"Trained on English Wikipedia + BooksCorpus."
),
"params": 222595584,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can actually add "official_name" and "path" for all models, even ones that are not yet documented on keras.io.

The path should always match the directory the model is in in KerasNLP. The official_name should match the original paper.

"official_name": "ALBERT",
"path": "albert",
},
"weights_url": "https://storage.googleapis.com/keras-nlp/models/albert_extra_extra_large_en_uncased/v1/model.h5",
"weights_hash": "a835177b692fb6a82139f94c66db2f22",
"spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/albert_extra_extra_large_en_uncased/v1/vocab.spm",
Expand Down
30 changes: 20 additions & 10 deletions keras_nlp/models/bart/bart_presets.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,17 @@
"max_sequence_length": 1024,
},
"preprocessor_config": {},
"description": (
"Base size of BART where case is maintained. "
"Trained on a 160GB English dataset comprising BookCorpus, "
"English Wikipedia and CommonCrawl."
),

"metadata": {
"description": (
"Base size of BART where case is maintained. "
"Trained on a 160GB English dataset comprising BookCorpus, "
"English Wikipedia and CommonCrawl."
),
"params": 139417344,
"official_name": "BART",
"path": "bart",
},
"weights_url": "https://storage.googleapis.com/keras-nlp/models/bart_base_en/v1/model.h5",
"weights_hash": "5b59403f0cafafbd89680e0785791163",
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/bart_base_en/v1/vocab.json",
Expand All @@ -48,11 +54,15 @@
"max_sequence_length": 1024,
},
"preprocessor_config": {},
"description": (
"Large size of BART where case is maintained. "
"Trained on a 160GB English dataset comprising BookCorpus, "
"English Wikipedia and CommonCrawl."
),
"metadata": {
"description": (
"Base size of BART where all input is lowercased. "
"Trained on English Wikipedia + BooksCorpus."
),
"params": 406287360,
"official_name": "BART",
"path": "bart",
},
"weights_url": "https://storage.googleapis.com/keras-nlp/models/bart_large_en/v1/model.h5",
"weights_hash": "6bfe7e591af8c5699ce6f9f18753af9a",
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/bart_large_en/v1/vocab.json",
Expand Down
121 changes: 85 additions & 36 deletions keras_nlp/models/bert/bert_presets.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,15 @@
"preprocessor_config": {
"lowercase": True,
},
"description": (
"Tiny size of BERT where all input is lowercased. "
"Trained on English Wikipedia + BooksCorpus."
),
"metadata": {
"description": (
"2-layer BERT model where all input is lowercased. "
"Trained on English Wikipedia + BooksCorpus."
),
"params": 4386178,
"official_name": "BERT",
"path": "bert",
},
"weights_url": "https://storage.googleapis.com/keras-nlp/models/bert_tiny_en_uncased/v1/model.h5",
"weights_hash": "c2b29fcbf8f814a0812e4ab89ef5c068",
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/bert_tiny_en_uncased/v1/vocab.txt",
Expand All @@ -53,10 +58,15 @@
"preprocessor_config": {
"lowercase": True,
},
"description": (
"Small size of BERT where all input is lowercased. "
"Trained on English Wikipedia + BooksCorpus."
),
"metadata": {
"description": (
"4-layer BERT model where all input is lowercased. "
"Trained on English Wikipedia + BooksCorpus."
),
"params": 28764674,
"official_name": "BERT",
"path": "bert",
},
"weights_url": "https://storage.googleapis.com/keras-nlp/models/bert_small_en_uncased/v1/model.h5",
"weights_hash": "08632c9479b034f342ba2c2b7afba5f7",
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/bert_small_en_uncased/v1/vocab.txt",
Expand All @@ -76,10 +86,15 @@
"preprocessor_config": {
"lowercase": True,
},
"description": (
"Medium size of BERT where all input is lowercased. "
"Trained on English Wikipedia + BooksCorpus."
),
"metadata": {
"description": (
"8-layer BERT model where all input is lowercased. "
"Trained on English Wikipedia + BooksCorpus."
),
"params": 41374210,
"official_name": "BERT",
"path": "bert",
},
"weights_url": "https://storage.googleapis.com/keras-nlp/models/bert_medium_en_uncased/v1/model.h5",
"weights_hash": "bb990e1184ec6b6185450c73833cd661",
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/bert_medium_en_uncased/v1/vocab.txt",
Expand All @@ -99,10 +114,15 @@
"preprocessor_config": {
"lowercase": True,
},
"description": (
"Base size of BERT where all input is lowercased. "
"Trained on English Wikipedia + BooksCorpus."
),
"metadata": {
"description": (
"12-layer BERT model where all input is lowercased. "
"Trained on English Wikipedia + BooksCorpus."
),
"params": 109483778,
"official_name": "BERT",
"path": "bert",
},
"weights_url": "https://storage.googleapis.com/keras-nlp/models/bert_base_en_uncased/v1/model.h5",
"weights_hash": "9b2b2139f221988759ac9cdd17050b31",
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/bert_base_en_uncased/v1/vocab.txt",
Expand All @@ -122,10 +142,15 @@
"preprocessor_config": {
"lowercase": False,
},
"description": (
"Base size of BERT where case is maintained. "
"Trained on English Wikipedia + BooksCorpus."
),
"metadata": {
"description": (
"12-layer BERT model where case is maintained. "
"Trained on English Wikipedia + BooksCorpus."
),
"params": 109483778,
"official_name": "BERT",
"path": "bert",
},
"weights_url": "https://storage.googleapis.com/keras-nlp/models/bert_base_en/v1/model.h5",
"weights_hash": "f94a6cb012e18f4fb8ec92abb91864e9",
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/bert_base_en/v1/vocab.txt",
Expand All @@ -145,7 +170,14 @@
"preprocessor_config": {
"lowercase": False,
},
"description": ("Base size of BERT. Trained on Chinese Wikipedia."),
"metadata": {
"description": (
"12-layer BERT model. Trained on Chinese Wikipedia."
),
"params": 102269186,
"official_name": "BERT",
"path": "bert",
},
"weights_url": "https://storage.googleapis.com/keras-nlp/models/bert_base_zh/v1/model.h5",
"weights_hash": "79afa421e386076e62ab42dad555ab0c",
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/bert_base_zh/v1/vocab.txt",
Expand All @@ -165,10 +197,14 @@
"preprocessor_config": {
"lowercase": False,
},
"description": (
"Base size of BERT. Trained on trained on Wikipedias of 104 "
"languages."
),
"metadata": {
"description": (
"12-layer BERT model where case is maintained. Trained on trained on Wikipedias of 104 languages"
),
"params": 177854978,
"official_name": "BERT",
"path": "bert",
},
"weights_url": "https://storage.googleapis.com/keras-nlp/models/bert_base_multi/v1/model.h5",
"weights_hash": "b0631cec0a1f2513c6cfd75ba29c33aa",
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/bert_base_multi/v1/vocab.txt",
Expand All @@ -188,10 +224,13 @@
"preprocessor_config": {
"lowercase": True,
},
"description": (
"Large size of BERT where all input is lowercased. "
"Trained on English Wikipedia + BooksCorpus."
),
"metadata": {
"description": (
"24-layer BERT model where all input is lowercased. "
"Trained on English Wikipedia + BooksCorpus."
),
"params": 335143938,
},
"weights_url": "https://storage.googleapis.com/keras-nlp/models/bert_large_en_uncased/v1/model.h5",
"weights_hash": "cc5cacc9565ef400ee4376105f40ddae",
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/bert_large_en_uncased/v1/vocab.txt",
Expand All @@ -211,10 +250,15 @@
"preprocessor_config": {
"lowercase": False,
},
"description": (
"Base size of BERT where case is maintained. "
"Trained on English Wikipedia + BooksCorpus."
),
"metadata": {
"description": (
"24-layer BERT model where case is maintained. "
"Trained on English Wikipedia + BooksCorpus."
),
"params": 333581314,
"official_name": "BERT",
"path": "bert",
},
"weights_url": "https://storage.googleapis.com/keras-nlp/models/bert_large_en/v1/model.h5",
"weights_hash": "8b8ab82290bbf4f8db87d4f100648890",
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/bert_large_en/v1/vocab.txt",
Expand Down Expand Up @@ -244,9 +288,14 @@
"preprocessor_config": {
"lowercase": True,
},
"description": (
"bert_tiny_en_uncased backbone fine-tuned on the glue/sst2 dataset."
),
"metadata": {
"description": (
"The bert_tiny_en_uncased backbone model fine-tuned on the SST-2 sentiment analysis dataset."
),
"params": 4386178,
"official_name": "BERT",
"path": "bert",
},
"weights_url": "https://storage.googleapis.com/keras-nlp/models/bert_tiny_en_uncased_sst2/v1/model.h5",
"weights_hash": "1f9c2d59f9e229e08f3fbd44239cfb0b",
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/bert_tiny_en_uncased_sst2/v1/vocab.txt",
Expand Down
Loading