Address comments: fix docstring, remove multicase support

chenmoneygithub · chenmoneygithub · commit 83231536da80 · 2023-01-09T12:08:18.000-08:00
diff --git a/keras_nlp/samplers/__init__.py b/keras_nlp/samplers/__init__.py
@@ -26,8 +26,6 @@ def deserialize(config, custom_objects=None):
     all_classes = {
         "greedy": Greedy,
     }
-    if config["class_name"].lower() in all_classes:
-        config["class_name"] = config["class_name"].lower()
     return keras.utils.deserialize_keras_object(
         config,
         module_objects=all_classes,
@@ -55,23 +53,27 @@ def get(identifier):
     instance of the class by its constructor.
 
     Args:
-      identifier: String or dict that contains the sampler name or
-        configurations.
+        identifier: String or dict that contains the sampler name or
+            configurations.
 
     Returns:
-      Sampler instance base on the input identifier.
+        Sampler instance base on the input identifier.
 
     Raises:
-      ValueError: If the input identifier is not a supported type or in a bad
-        format.
+        ValueError: If the input identifier is not a supported type or in a bad
+            format.
     """
 
     if identifier is None:
         return None
     if isinstance(identifier, dict):
         return deserialize(identifier)
     elif isinstance(identifier, str):
-        identifier = {"class_name": str(identifier), "config": {}}
+        if not identifier.islower():
+            raise KeyError(
+                "`keras_nlp.samplers.get()` must take a lowercase string "
+                f"identifier, but received: {identifier}."
+            )
         return deserialize(identifier)
     elif callable(identifier):
         return identifier
diff --git a/keras_nlp/samplers/greedy.py b/keras_nlp/samplers/greedy.py
@@ -17,11 +17,15 @@
 from tensorflow import keras
 
 from keras_nlp.samplers.sampler import Sampler
-from keras_nlp.samplers.sampler import base_sampler_keyword_args
-from keras_nlp.samplers.sampler import call_keyword_docstring
-from keras_nlp.samplers.sampler import sample_keyword_docstring
+from keras_nlp.samplers.sampler import base_sampler_args_docstring
+from keras_nlp.samplers.sampler import call_args_docstring
+from keras_nlp.samplers.sampler import sample_args_docstring
+from keras_nlp.utils.python_utils import format_docstring
 
 
+@format_docstring(
+    base_sampler_args=base_sampler_args_docstring, call_args=call_args_docstring
+)
 @keras.utils.register_keras_serializable(package="keras_nlp")
 class Greedy(Sampler):
     """Greedy sampler class.
@@ -30,10 +34,10 @@ class Greedy(Sampler):
     token of the largest probability as the next token.
 
     Args:
-        {{base_sampler_keyword_args}}
+        {{base_sampler_args}}
 
     Call Args:
-        {{call_keyword_args}}
+        {{call_args}}
 
     Examples:
     ```python
@@ -73,13 +77,14 @@ def __init__(
     ):
         super().__init__(jit_compile)
 
+    @format_docstring(sample_args=sample_args_docstring)
     def sample(
         self, token_probability_fn, prompt, mask, num_steps, from_logits=True
     ):
         """Sampling logic implementation.
 
         Args:
-            {{sample_keyword_docstring}}
+            {{sample_args}}
         """
         batch_size, max_length = tf.shape(prompt)[0], tf.shape(prompt)[1]
         max_length = tf.cast(max_length, num_steps.dtype)
@@ -88,7 +93,6 @@ def sample(
         current_index = max_length - num_steps
 
         def one_step(current_index, prompt, mask):
-
             probs = token_probability_fn(prompt, mask)
             next_token_prob = tf.gather(
                 probs,
@@ -143,14 +147,3 @@ def one_step(current_index, prompt, mask):
             loop_vars=(current_index, prompt, mask),
         )
         return prompt
-
-
-Greedy.__doc__ = Greedy.__doc__.replace(
-    "{{base_sampler_keyword_args}}", base_sampler_keyword_args
-)
-Greedy.__doc__ = Greedy.__doc__.replace(
-    "{{call_keyword_docstring}}", call_keyword_docstring
-)
-Greedy.sample.__doc__ = Greedy.sample.__doc__.replace(
-    "{{sample_keyword_docstring}}", sample_keyword_docstring
-)
diff --git a/keras_nlp/samplers/sampler.py b/keras_nlp/samplers/sampler.py
@@ -16,16 +16,55 @@
 import tensorflow as tf
 from tensorflow import keras
 
+from keras_nlp.utils.python_utils import format_docstring
 
+base_sampler_args_docstring = """
+    jit_compile: bool, defaults to True. If True, XLA compilation will be used.
+    """
+
+call_args_docstring = """
+    token_probability_fn: a function that generates the probability of
+        the next token over the whole vocabulary for each input token.
+    prompt: a list of integers or an integer Tensor, can be 1D or 2D. The
+        initial tokens to append generated tokens.
+    max_length: int. The max length of generated sequence.
+    padding_mask: a tensor, defaults to None. The padding mask of the prompt.
+    end_token_id: int, defaults to None. The token marking the end of the
+        sequence, once encountered the generation is finished for the exact
+        sequence. If None, every sequence is generated up to `max_length`.
+        If set, all tokens after encountering `end_token_id` will be
+        replaced with `pad_token_id`.
+    from_logits: bool, defaults to True. Indicate if the `token_probability_fn`
+        returns logits. If False, `token_probability_fn` returns probability
+        distributions.
+    """
+
+sample_args_docstring = """
+    token_probability_fn: a function that generates the probability of
+        the next token over the whole vocabulary for each input token.
+    prompt: a dense int Tensor of shape [batch_size, max_length]. The
+        placeholder for generated sequence.
+    mask: a dense bool Tensor of shape [batch_size, max_length]. The mask of
+        prompt.
+    num_steps: int. The remaining number of tokens to generate.
+    from_logits: bool, defaults to True. Indicate if the `token_probability_fn`
+        returns logits. If False, `token_probability_fn` returns probability
+        distributions.
+    """
+
+
+@format_docstring(
+    base_sampler_args=base_sampler_args_docstring, call_args=call_args_docstring
+)
 @keras.utils.register_keras_serializable(package="keras_nlp")
 class Sampler:
     """Base sampler class.
 
     Args:
-        {{base_optimizer_keyword_args}}
+        {{base_sampler_args}}
 
     Call Args:
-        {{call_keyword_docstring}}
+        {{call_args}}
 
     The inputs and outputs of Sampler class are both token ids.
 
@@ -39,7 +78,8 @@ class Sampler:
     START_ID = 1
     END_ID = 2
 
-    # Create a dummy model to predict the next token.
+    # Create a dummy model to predict the next token. Note that the output is
+    # random without training, here we jsut demo how `samplers` works.
     model = keras.Sequential(
         [
             keras.Input(shape=[None]),
@@ -178,7 +218,8 @@ def __call__(
         from_logits=True,
     ):
         prompt, padding_mask = self._validate_prompt_and_mask(
-            prompt, padding_mask
+            prompt,
+            padding_mask,
         )
 
         input_is_1d = prompt.shape.rank == 1
@@ -214,13 +255,14 @@ def __call__(
 
         return tf.squeeze(prompt, axis=0) if input_is_1d else prompt
 
+    @format_docstring(sample_args=sample_args_docstring)
     def sample(
         self, token_probability_fn, prompt, mask, num_steps, from_logits=True
     ):
         """Sampling logic implementation.
 
         Args:
-            {{sample_keyword_docstring}}
+            {{sample_args}}
 
         Returns:
             A dense int Tensor, representing the generated text in token id
@@ -232,48 +274,3 @@ def get_config(self):
         return {
             "jit_compile": self.jit_compile,
         }
-
-
-base_sampler_keyword_args = """
-    jit_compile: bool, defaults to True. If True, XLA compilation will be used.
-    """
-
-call_keyword_docstring = """
-    token_probability_fn: a function that generates the probability of
-        the next token over the whole vocabulary for each input token.
-    prompt: a list of integers or an integer Tensor, can be 1D or 2D. The
-        initial tokens to append generated tokens.
-    max_length: int. The max length of generated sequence.
-    padding_mask: a tensor, defaults to None. The padding mask of the prompt.
-    end_token_id: int, defaults to None. The token marking the end of the
-        sequence, once encountered the generation is finished for the exact
-        sequence. If None, every sequence is generated up to `max_length`.
-        If set, all tokens after encountering `end_token_id` will be
-        replaced with `pad_token_id`.
-    from_logits: bool, defaults to True. Indicate if the `token_probability_fn`
-        returns logits. If False, `token_probability_fn` returns probability
-        distributions.
-    """
-
-sample_keyword_docstring = """
-    token_probability_fn: a function that generates the probability of
-        the next token over the whole vocabulary for each input token.
-    prompt: a dense int Tensor of shape [batch_size, max_length]. The
-        placeholder for generated sequence.
-    mask: a dense bool Tensor of shape [batch_size, max_length]. The mask of
-        prompt.
-    num_steps: int. The remaining number of tokens to generate.
-    from_logits: bool, defaults to True. Indicate if the `token_probability_fn`
-        returns logits. If False, `token_probability_fn` returns probability
-        distributions.
-    """
-
-Sampler.__doc__ = Sampler.__doc__.replace(
-    "{{base_sampler_keyword_args}}", base_sampler_keyword_args
-)
-Sampler.__doc__ = Sampler.__doc__.replace(
-    "{{call_keyword_docstring}}", call_keyword_docstring
-)
-Sampler.sample.__doc__ = Sampler.sample.__doc__.replace(
-    "{{sample_keyword_docstring}}", sample_keyword_docstring
-)
diff --git a/keras_nlp/samplers/sampler_test.py b/keras_nlp/samplers/sampler_test.py
@@ -24,7 +24,7 @@ def test_serialization(self):
         sampler = keras_nlp.samplers.Greedy()
         config = keras_nlp.samplers.serialize(sampler)
         expected_config = {
-            "class_name": "Greedy",
+            "class_name": "keras_nlp>Greedy",
             "config": {
                 "jit_compile": True,
             },
@@ -37,11 +37,6 @@ def test_deserialization(self):
         sampler = keras_nlp.samplers.get(identifier)
         self.assertIsInstance(sampler, Greedy)
 
-        # Test string is not case-sensitive.
-        identifier = "Greedy"
-        sampler = keras_nlp.samplers.get(identifier)
-        self.assertIsInstance(sampler, Greedy)
-
         # Test dict identifier.
         original_sampler = keras_nlp.samplers.Greedy(jit_compile=False)
         config = keras_nlp.samplers.serialize(original_sampler)