Skip to content
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion keras_nlp/models/albert/albert_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class AlbertTokenizer(SentencePieceTokenizer):
tokenizer("the quick brown fox")

# Detokenization.
tokenizer.detokenize(tf.constant([[[2, 14, 2231, 886, 2385, 3]]))
tokenizer.detokenize(tf.constant([[2, 14, 2231, 886, 2385, 3]]))
```
"""

Expand Down
7 changes: 5 additions & 2 deletions keras_nlp/models/backbone.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,15 @@ def from_preset(
Defaults to `True`.

Examples:
doctest.skip
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's remember to remove this.

```python
# Load architecture and weights from preset
model = {{model_name}}.from_preset("{{example_preset_name}}")
model = keras_nlp.models.{{model_name}}.from_preset(
"{{example_preset_name}}"
)

# Load randomly initialized model from preset architecture
model = {{model_name}}.from_preset(
model = keras_nlp.models.{{model_name}}.from_preset(
"{{example_preset_name}}",
load_weights=False
)
Expand Down
2 changes: 1 addition & 1 deletion keras_nlp/models/bert/bert_backbone.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ class BertBackbone(Backbone):
}

# Pretrained BERT encoder
model = keras_nlp.models.BertBackbone.from_preset("base_base_en_uncased")
model = keras_nlp.models.BertBackbone.from_preset("bert_base_en_uncased")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lol, good we are running this :)

output = model(input_data)

# Randomly initialized BERT encoder with a custom config
Expand Down
7 changes: 5 additions & 2 deletions keras_nlp/models/distil_bert/distil_bert_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ class DistilBertClassifier(PipelineModel):
preprocessed_features = {
"token_ids": tf.ones(shape=(2, 12), dtype=tf.int64),
"padding_mask": tf.constant(
[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(1, 12)),
[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12)),
}
labels = [0, 3]

Expand All @@ -84,6 +84,9 @@ class DistilBertClassifier(PipelineModel):
num_classes=4,
preprocessor=None,
)
classifier.compile(
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
)
classifier.fit(x=preprocessed_features, y=labels, batch_size=2)

# Access backbone programatically (e.g., to change `trainable`)
Expand Down Expand Up @@ -218,7 +221,7 @@ def from_preset(
labels = [0, 3]

# Use a shorter sequence length.
preprocessor = keras_nlp.models.DistilBertBackbone.from_preset(
preprocessor = keras_nlp.models.DistilBertPreprocessor.from_preset(
"distil_bert_base_en_uncased",
sequence_length=128,
)
Expand Down
12 changes: 6 additions & 6 deletions keras_nlp/models/roberta/roberta_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,8 +197,8 @@ def from_preset(
features = ["The quick brown fox jumped.", "I forgot my homework."]
labels = [0, 3]

# Create a RobertClassifier and fit your data.
classifier = keras_nlp.models.RobertClassifier.from_preset(
# Create a RobertaClassifier and fit your data.
classifier = keras_nlp.models.RobertaClassifier.from_preset(
"roberta_base_en",
num_classes=4,
)
Expand All @@ -215,13 +215,13 @@ def from_preset(
labels = [0, 3]

# Use a shorter sequence length.
preprocessor = keras_nlp.models.RobertPreprocessor.from_preset(
preprocessor = keras_nlp.models.RobertaPreprocessor.from_preset(
"roberta_base_en",
sequence_length=128,
)

# Create a RobertClassifier and fit your data.
classifier = keras_nlp.models.RobertClassifier.from_preset(
# Create a RobertaClassifier and fit your data.
classifier = keras_nlp.models.RobertaClassifier.from_preset(
"roberta_base_en",
num_classes=4,
preprocessor=preprocessor,
Expand All @@ -244,7 +244,7 @@ def from_preset(
labels = [0, 3]

# Create a RoBERTa classifier and fit your data.
classifier = keras_nlp.models.RobertClassifier.from_preset(
classifier = keras_nlp.models.RobertaClassifier.from_preset(
"roberta_base_en",
num_classes=4,
preprocessor=None,
Expand Down
5 changes: 4 additions & 1 deletion keras_nlp/models/xlm_roberta/xlm_roberta_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ class XLMRobertaClassifier(PipelineModel):
preprocessed_features = {
"token_ids": tf.ones(shape=(2, 12), dtype=tf.int64),
"padding_mask": tf.constant(
[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(1, 12)),
[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12)),
}
labels = [0, 3]

Expand All @@ -82,6 +82,9 @@ class XLMRobertaClassifier(PipelineModel):
num_classes=4,
preprocessor=None,
)
classifier.compile(
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
)
classifier.fit(x=preprocessed_features, y=labels, batch_size=2)

# Access backbone programatically (e.g., to change `trainable`)
Expand Down
2 changes: 1 addition & 1 deletion keras_nlp/models/xlm_roberta/xlm_roberta_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def from_preset(
tokenizer("The quick brown fox tripped.")

# Detokenize some input.
tokenizer.detokenize([5, 6, 7, 8, 9])
tokenizer.detokenize(tf.constant([581, 63773, 119455, 6, 147797]))
```
"""
if preset not in cls.presets:
Expand Down
71 changes: 68 additions & 3 deletions keras_nlp/tests/doc_tests/docstring_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,23 @@
# limitations under the License.

import doctest
import io
import os
import sys
import unittest

import numpy as np
import pytest
import sentencepiece
import tensorflow as tf
from tensorflow import keras

import keras_nlp
from keras_nlp.tests.doc_tests import docstring_lib
from keras_nlp.tests.doc_tests import fenced_docstring_lib

PACKAGE = "keras_nlp."
DIRECTORY = "keras_nlp"


def find_modules():
Expand All @@ -37,9 +41,6 @@ def find_modules():
return keras_nlp_modules


@pytest.mark.skipif(
sys.platform == "win32", reason="Numpy prints differently on windows"
)
def test_docstrings():
keras_nlp_modules = find_modules()
# As of this writing, it doesn't seem like pytest support load_tests
Expand Down Expand Up @@ -77,3 +78,67 @@ def test_docstrings():
if not result.wasSuccessful():
print(result)
assert result.wasSuccessful()


@pytest.mark.extra_large
def test_fenced_docstrings():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should probably mark this a large test, this will involve a lot of file downloads for the preset right?

"""Tests fenced code blocks in docstrings.

This can only be run manually. Run with:
`pytest keras_nlp/tests/doc_tests/docstring_test.py --run_extra_large`
"""
keras_nlp_modules = find_modules()

runner = unittest.TextTestRunner()
suite = unittest.TestSuite()
for module in keras_nlp_modules:
# Temporarily stop testing gpt2 & deberta docstrings until we are
# exporting the symbols.
if "gpt2" in module.__name__ or "deberta_v3" in module.__name__:
continue
# Do not test certain modules.
if module.__name__ in [
# Base classes.
"keras_nlp.models.backbone",
"keras_nlp.models.preprocessor",
# Preprocessors and tokenizers which use `model.spm`.
"keras_nlp.models.albert.albert_preprocessor",
"keras_nlp.models.albert.albert_tokenizer",
"keras_nlp.models.xlm_roberta.xlm_roberta_preprocessor",
]:
continue

suite.addTest(
doctest.DocTestSuite(
module,
test_finder=doctest.DocTestFinder(
exclude_empty=False,
parser=fenced_docstring_lib.FencedCellParser(
fence_label="python"
),
),
globs={
"_print_if_not_none": fenced_docstring_lib._print_if_not_none
},
extraglobs={
"tf": tf,
"np": np,
"os": os,
"keras": keras,
"keras_nlp": keras_nlp,
"io": io,
"sentencepiece": sentencepiece,
},
checker=docstring_lib.DoctestOutputChecker(),
optionflags=(
doctest.ELLIPSIS
| doctest.NORMALIZE_WHITESPACE
| doctest.IGNORE_EXCEPTION_DETAIL
| doctest.DONT_ACCEPT_BLANKLINE
),
)
)
result = runner.run(suite)
if not result.wasSuccessful():
print(result)
assert result.wasSuccessful()
Loading