From 246c242fb18218ebbbeaa87e254722f5ddfc0c3c Mon Sep 17 00:00:00 2001 From: divyashreepathihalli Date: Mon, 5 Aug 2024 21:13:44 +0000 Subject: [PATCH 01/25] Agg Vgg16 backbone --- keras_nlp/src/models/VGG16/vgg_16_backbone.py | 232 ++++++++++++++++++ .../src/models/VGG16/vgg_16_backbone_test.py | 46 ++++ 2 files changed, 278 insertions(+) create mode 100644 keras_nlp/src/models/VGG16/vgg_16_backbone.py create mode 100644 keras_nlp/src/models/VGG16/vgg_16_backbone_test.py diff --git a/keras_nlp/src/models/VGG16/vgg_16_backbone.py b/keras_nlp/src/models/VGG16/vgg_16_backbone.py new file mode 100644 index 0000000000..11d5ee5dfe --- /dev/null +++ b/keras_nlp/src/models/VGG16/vgg_16_backbone.py @@ -0,0 +1,232 @@ +# Copyright 2023 The KerasNLP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import keras +from keras import layers + +from keras_nlp.src.api_export import keras_nlp_export +from keras_nlp.src.models.backbone import Backbone + + +@keras_nlp_export("keras_nlp.models.VGG16Backbone") +class VGG16Backbone(Backbone): + """ + Reference: + - [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556) + (ICLR 2015) + This class represents Keras Backbone of VGG16 model. + Args: + include_rescaling: bool, whether to rescale the inputs. If set to + True, inputs will be passed through a `Rescaling(1/255.0)` layer. + include_top: bool, whether to include the 3 fully-connected + layers at the top of the network. If provided, num_classes must be + provided. + num_classes: int, optional number of classes to classify images into, + only to be specified if `include_top` is True. + input_shape: tuple, optional shape tuple, defaults to (224, 224, 3). + input_tensor: Tensor, optional Keras tensor (i.e. output of + `layers.Input()`) to use as image input for the model. + pooling: bool, Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional block. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional block, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classifier_activation:`str` or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + name: (Optional) name to pass to the model, defaults to "VGG16". + Returns: + A `keras.Model` instance. + """ # noqa: E501 + + def __init__( + self, + include_rescaling, + include_top, + input_tensor=None, + num_classes=None, + input_shape=(224, 224, 3), + pooling=None, + classifier_activation="softmax", + name="VGG16", + **kwargs, + ): + + if include_top and num_classes is None: + raise ValueError( + "If `include_top` is True, you should specify `num_classes`. " + f"Received: num_classes={num_classes}" + ) + + if include_top and pooling: + raise ValueError( + f"`pooling` must be `None` when `include_top=True`." + f"Received pooling={pooling} and include_top={include_top}. " + ) + + img_input = parse_model_inputs(input_shape, input_tensor) + x = img_input + + if include_rescaling: + x = layers.Rescaling(scale=1 / 255.0)(x) + + x = apply_vgg_block( + x=x, + num_layers=2, + filters=64, + kernel_size=(3, 3), + activation="relu", + padding="same", + max_pool=True, + name="block1", + ) + + x = apply_vgg_block( + x=x, + num_layers=2, + filters=128, + kernel_size=(3, 3), + activation="relu", + padding="same", + max_pool=True, + name="block2", + ) + + x = apply_vgg_block( + x=x, + num_layers=3, + filters=256, + kernel_size=(3, 3), + activation="relu", + padding="same", + max_pool=True, + name="block3", + ) + + x = apply_vgg_block( + x=x, + num_layers=3, + filters=512, + kernel_size=(3, 3), + activation="relu", + padding="same", + max_pool=True, + name="block4", + ) + + x = apply_vgg_block( + x=x, + num_layers=3, + filters=512, + kernel_size=(3, 3), + activation="relu", + padding="same", + max_pool=True, + name="block5", + ) + + if include_top: + x = layers.Flatten(name="flatten")(x) + x = layers.Dense(4096, activation="relu", name="fc1")(x) + x = layers.Dense(4096, activation="relu", name="fc2")(x) + x = layers.Dense( + num_classes, + activation=classifier_activation, + name="predictions", + )(x) + else: + if pooling == "avg": + x = layers.GlobalAveragePooling2D()(x) + elif pooling == "max": + x = layers.GlobalMaxPooling2D()(x) + + super().__init__(inputs=img_input, outputs=x, name=name, **kwargs) + + self.include_rescaling = include_rescaling + self.include_top = include_top + self.num_classes = num_classes + self.input_tensor = input_tensor + self.pooling = pooling + self.classifier_activation = classifier_activation + + def get_config(self): + return { + "include_rescaling": self.include_rescaling, + "include_top": self.include_top, + "name": self.name, + "input_shape": self.input_shape[1:], + "input_tensor": self.input_tensor, + "pooling": self.pooling, + "num_classes": self.num_classes, + "classifier_activation": self.classifier_activation, + "trainable": self.trainable, + } + + +def apply_vgg_block( + x, + num_layers, + filters, + kernel_size, + activation, + padding, + max_pool, + name, +): + """ + Applies VGG block + Args: + x: Tensor, input tensor to pass through network + num_layers: int, number of CNN layers in the block + filters: int, filter size of each CNN layer in block + kernel_size: int (or) tuple, kernel size for CNN layer in block + activation: str (or) callable, activation function for each CNN layer in + block + padding: str (or) callable, padding function for each CNN layer in block + max_pool: bool, whether to add MaxPooling2D layer at end of block + name: str, name of the block + + Returns: + keras.KerasTensor + """ + for num in range(1, num_layers + 1): + x = layers.Conv2D( + filters, + kernel_size, + activation=activation, + padding=padding, + name=f"{name}_conv{num}", + )(x) + if max_pool: + x = layers.MaxPooling2D((2, 2), (2, 2), name=f"{name}_pool")(x) + return x + + +def parse_model_inputs(input_shape, input_tensor, **kwargs): + if input_tensor is None: + return keras.layers.Input(shape=input_shape, **kwargs) + else: + if not keras.backend.is_keras_tensor(input_tensor): + return keras.layers.Input( + tensor=input_tensor, shape=input_shape, **kwargs + ) + else: + return input_tensor diff --git a/keras_nlp/src/models/VGG16/vgg_16_backbone_test.py b/keras_nlp/src/models/VGG16/vgg_16_backbone_test.py new file mode 100644 index 0000000000..a04cd5be1f --- /dev/null +++ b/keras_nlp/src/models/VGG16/vgg_16_backbone_test.py @@ -0,0 +1,46 @@ +# Copyright 2023 The KerasNLP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import pytest + +from keras_nlp.src.models.VGG16 import VGG16Backbone +from keras_nlp.src.tests.test_case import TestCase + + +class VGG16BackboneTest(TestCase): + def setUp(self): + self.init_kwargs = { + "input_shape": (224, 224, 3), + "include_top": False, + "include_rescaling": False, + "pooling": "avg", + } + self.input_data = np.ones((2, 224, 224, 3), dtype="float32") + + def test_backbone_basics(self): + self.run_backbone_test( + cls=VGG16Backbone, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + expected_output_shape=(2, 512), + ) + + @pytest.mark.large + def test_saved_model(self): + self.run_model_saving_test( + cls=VGG16Backbone, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + ) From 25fddb0f020af46bddac772726558c890705685e Mon Sep 17 00:00:00 2001 From: divyashreepathihalli Date: Mon, 5 Aug 2024 21:31:51 +0000 Subject: [PATCH 02/25] update names --- keras_nlp/src/models/vgg16/__init__.py | 15 +++++++ .../vgg16_backbone.py} | 0 .../vgg16_backbone_test.py} | 4 +- keras_nlp/src/tests/test_case.py | 43 +++++++++++++++++++ 4 files changed, 60 insertions(+), 2 deletions(-) create mode 100644 keras_nlp/src/models/vgg16/__init__.py rename keras_nlp/src/models/{VGG16/vgg_16_backbone.py => vgg16/vgg16_backbone.py} (100%) rename keras_nlp/src/models/{VGG16/vgg_16_backbone_test.py => vgg16/vgg16_backbone_test.py} (93%) diff --git a/keras_nlp/src/models/vgg16/__init__.py b/keras_nlp/src/models/vgg16/__init__.py new file mode 100644 index 0000000000..f02919ada3 --- /dev/null +++ b/keras_nlp/src/models/vgg16/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2024 The KerasNLP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from keras_nlp.src.models.vgg16.vgg16_backbone import VGG16Backbone diff --git a/keras_nlp/src/models/VGG16/vgg_16_backbone.py b/keras_nlp/src/models/vgg16/vgg16_backbone.py similarity index 100% rename from keras_nlp/src/models/VGG16/vgg_16_backbone.py rename to keras_nlp/src/models/vgg16/vgg16_backbone.py diff --git a/keras_nlp/src/models/VGG16/vgg_16_backbone_test.py b/keras_nlp/src/models/vgg16/vgg16_backbone_test.py similarity index 93% rename from keras_nlp/src/models/VGG16/vgg_16_backbone_test.py rename to keras_nlp/src/models/vgg16/vgg16_backbone_test.py index a04cd5be1f..e820bd5cd8 100644 --- a/keras_nlp/src/models/VGG16/vgg_16_backbone_test.py +++ b/keras_nlp/src/models/vgg16/vgg16_backbone_test.py @@ -15,7 +15,7 @@ import numpy as np import pytest -from keras_nlp.src.models.VGG16 import VGG16Backbone +from keras_nlp.src.models.vgg16.vgg16_backbone import VGG16Backbone from keras_nlp.src.tests.test_case import TestCase @@ -30,7 +30,7 @@ def setUp(self): self.input_data = np.ones((2, 224, 224, 3), dtype="float32") def test_backbone_basics(self): - self.run_backbone_test( + self.run_cv_backbone_test( cls=VGG16Backbone, init_kwargs=self.init_kwargs, input_data=self.input_data, diff --git a/keras_nlp/src/tests/test_case.py b/keras_nlp/src/tests/test_case.py index 7e8e0cec95..313d7eda95 100644 --- a/keras_nlp/src/tests/test_case.py +++ b/keras_nlp/src/tests/test_case.py @@ -455,6 +455,49 @@ def run_backbone_test( if run_quantization_check and has_quantization_support(): self.run_quantization_test(backbone, cls, init_kwargs, input_data) + def run_cv_backbone_test( + self, + cls, + init_kwargs, + input_data, + expected_output_shape, + run_mixed_precision_check=True, + run_quantization_check=True, + ): + """Run basic tests for a backbone, including compilation.""" + backbone = cls(**init_kwargs) + # Check serialization (without a full save). + self.run_serialization_test(backbone) + + # Call model eagerly. + output = backbone(input_data) + if isinstance(expected_output_shape, dict): + for key in expected_output_shape: + self.assertEqual(output[key].shape, expected_output_shape[key]) + else: + self.assertEqual(output.shape, expected_output_shape) + + # Check compiled predict function. + backbone.predict(input_data) + # Convert to numpy first, torch GPU tensor -> tf.data will error. + numpy_data = tree.map_structure(ops.convert_to_numpy, input_data) + # Create a dataset. + input_dataset = tf.data.Dataset.from_tensor_slices(numpy_data).batch(2) + backbone.predict(input_dataset) + + # Check name maps to classname. + name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", cls.__name__) + name = re.sub("([a-z])([A-Z])", r"\1_\2", name).lower() + self.assertRegexpMatches(backbone.name, name) + + # Check mixed precision. + if run_mixed_precision_check: + self.run_precision_test(cls, init_kwargs, input_data) + + # Check quantization. + if run_quantization_check and has_quantization_support(): + self.run_quantization_test(backbone, cls, init_kwargs, input_data) + def run_task_test( self, cls, From 0d3414f56be83944881e39c1142ed1810f4dfcf8 Mon Sep 17 00:00:00 2001 From: divyashreepathihalli Date: Mon, 5 Aug 2024 21:50:06 +0000 Subject: [PATCH 03/25] update tests --- keras_nlp/src/models/vgg16/vgg16_backbone.py | 2 +- keras_nlp/src/models/vgg16/vgg16_backbone_test.py | 1 + keras_nlp/src/tests/test_case.py | 5 +++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/keras_nlp/src/models/vgg16/vgg16_backbone.py b/keras_nlp/src/models/vgg16/vgg16_backbone.py index 11d5ee5dfe..862250044f 100644 --- a/keras_nlp/src/models/vgg16/vgg16_backbone.py +++ b/keras_nlp/src/models/vgg16/vgg16_backbone.py @@ -66,7 +66,7 @@ def __init__( input_shape=(224, 224, 3), pooling=None, classifier_activation="softmax", - name="VGG16", + name="vgg16_backbone", **kwargs, ): diff --git a/keras_nlp/src/models/vgg16/vgg16_backbone_test.py b/keras_nlp/src/models/vgg16/vgg16_backbone_test.py index e820bd5cd8..6444919ee4 100644 --- a/keras_nlp/src/models/vgg16/vgg16_backbone_test.py +++ b/keras_nlp/src/models/vgg16/vgg16_backbone_test.py @@ -35,6 +35,7 @@ def test_backbone_basics(self): init_kwargs=self.init_kwargs, input_data=self.input_data, expected_output_shape=(2, 512), + run_mixed_precision_check=False, ) @pytest.mark.large diff --git a/keras_nlp/src/tests/test_case.py b/keras_nlp/src/tests/test_case.py index 313d7eda95..c7f2f851c7 100644 --- a/keras_nlp/src/tests/test_case.py +++ b/keras_nlp/src/tests/test_case.py @@ -488,11 +488,12 @@ def run_cv_backbone_test( # Check name maps to classname. name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", cls.__name__) name = re.sub("([a-z])([A-Z])", r"\1_\2", name).lower() + self.assertRegexpMatches(backbone.name, name) # Check mixed precision. - if run_mixed_precision_check: - self.run_precision_test(cls, init_kwargs, input_data) + # if run_mixed_precision_check: + # self.run_precision_test(cls, init_kwargs, input_data) # Check quantization. if run_quantization_check and has_quantization_support(): From a0c1a72b9db281bf7f50dfb9273bbfbbf12d8c4e Mon Sep 17 00:00:00 2001 From: divyashreepathihalli Date: Mon, 5 Aug 2024 21:50:36 +0000 Subject: [PATCH 04/25] update test --- keras_nlp/src/tests/test_case.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/keras_nlp/src/tests/test_case.py b/keras_nlp/src/tests/test_case.py index c7f2f851c7..313d7eda95 100644 --- a/keras_nlp/src/tests/test_case.py +++ b/keras_nlp/src/tests/test_case.py @@ -488,12 +488,11 @@ def run_cv_backbone_test( # Check name maps to classname. name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", cls.__name__) name = re.sub("([a-z])([A-Z])", r"\1_\2", name).lower() - self.assertRegexpMatches(backbone.name, name) # Check mixed precision. - # if run_mixed_precision_check: - # self.run_precision_test(cls, init_kwargs, input_data) + if run_mixed_precision_check: + self.run_precision_test(cls, init_kwargs, input_data) # Check quantization. if run_quantization_check and has_quantization_support(): From 3d57f7362adb7ac840a74c7e7d7a0a1054900314 Mon Sep 17 00:00:00 2001 From: divyashreepathihalli Date: Mon, 5 Aug 2024 22:38:38 +0000 Subject: [PATCH 05/25] add image classifier --- .../models/vgg16/vgg16_image_classifier.py | 116 ++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 keras_nlp/src/models/vgg16/vgg16_image_classifier.py diff --git a/keras_nlp/src/models/vgg16/vgg16_image_classifier.py b/keras_nlp/src/models/vgg16/vgg16_image_classifier.py new file mode 100644 index 0000000000..e395fbc21c --- /dev/null +++ b/keras_nlp/src/models/vgg16/vgg16_image_classifier.py @@ -0,0 +1,116 @@ +# Copyright 2024 The KerasNLP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Image classifier model using pooling and dense layers.""" +import keras + +from keras_nlp.src.api_export import keras_nlp_export +from keras_nlp.src.models.classifier import Classifier +from keras_nlp.src.models.vgg16.vgg16_backbone import VGG16Backbone + + +@keras_nlp_export("keras_nlp.models.VGG16ImageClassifier") +class ImageClassifier(Classifier): + """Image classifier with pooling and dense layer prediction head. + + Args: + backbone: `keras.Model` instance, the backbone architecture of the + classifier called on the inputs. Pooling will be called on the last + dimension of the backbone output. + num_classes: int, number of classes to predict. + pooling: str, type of pooling layer. Must be one of "avg", "max". + activation: Optional `str` or callable, defaults to "softmax". The + activation function to use on the Dense layer. Set `activation=None` + to return the output logits. + + Example: + ```python + input_data = tf.ones(shape=(8, 224, 224, 3)) + + # Pretrained classifier (e.g., for imagenet categories) + model = keras_cv.models.ImageClassifier.from_preset( + "resnet50_v2_imagenet_classifier", + ) + output = model(input_data) + + # Pretrained backbone + backbone = keras_cv.models.ResNet50V2Backbone.from_preset( + "resnet50_v2_imagenet", + ) + model = keras_cv.models.ImageClassifier( + backbone=backbone, + num_classes=4, + ) + output = model(input_data) + + # Randomly initialized backbone with a custom config + model = keras_cv.models.ImageClassifier( + backbone=keras_cv.models.ResNet50V2Backbone(), + num_classes=4, + ) + output = model(input_data) + ``` + """ + + backbone_cls = VGG16Backbone + + def __init__( + self, + backbone, + num_classes, + pooling="avg", + activation="softmax", + **kwargs, + ): + self.backbone = backbone + if pooling == "avg": + pooling_layer = keras.layers.GlobalAveragePooling2D(name="avg_pool") + elif pooling == "max": + pooling_layer = keras.layers.GlobalMaxPooling2D(name="max_pool") + else: + raise ValueError( + f'`pooling` must be one of "avg", "max". Received: {pooling}.' + ) + inputs = backbone.input + x = backbone(inputs) + x = pooling_layer(x) + outputs = keras.layers.Dense( + num_classes, + activation=activation, + name="predictions", + )(x) + + # Instantiate using Functional API Model constructor + super().__init__( + inputs=inputs, + outputs=outputs, + **kwargs, + ) + # All references to `self` below this line + self.num_classes = num_classes + self.pooling = pooling + self.activation = activation + + def get_config(self): + # Backbone serialized in `super` + config = super().get_config() + config.update( + { + "backbone": keras.layers.serialize(self.backbone), + "num_classes": self.num_classes, + "pooling": self.pooling, + "activation": self.activation, + } + ) + return config From c23d5731aeceab10d3ed6d5ca57014fb0e2e45d2 Mon Sep 17 00:00:00 2001 From: divyashreepathihalli Date: Tue, 6 Aug 2024 20:27:04 +0000 Subject: [PATCH 06/25] incorporate review comments --- keras_nlp/src/models/vgg16/vgg16_backbone.py | 88 +++---------------- .../src/models/vgg16/vgg16_backbone_test.py | 1 - 2 files changed, 14 insertions(+), 75 deletions(-) diff --git a/keras_nlp/src/models/vgg16/vgg16_backbone.py b/keras_nlp/src/models/vgg16/vgg16_backbone.py index 862250044f..1896a9da56 100644 --- a/keras_nlp/src/models/vgg16/vgg16_backbone.py +++ b/keras_nlp/src/models/vgg16/vgg16_backbone.py @@ -21,21 +21,12 @@ @keras_nlp_export("keras_nlp.models.VGG16Backbone") class VGG16Backbone(Backbone): """ - Reference: - - [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556) - (ICLR 2015) This class represents Keras Backbone of VGG16 model. + Args: include_rescaling: bool, whether to rescale the inputs. If set to True, inputs will be passed through a `Rescaling(1/255.0)` layer. - include_top: bool, whether to include the 3 fully-connected - layers at the top of the network. If provided, num_classes must be - provided. - num_classes: int, optional number of classes to classify images into, - only to be specified if `include_top` is True. input_shape: tuple, optional shape tuple, defaults to (224, 224, 3). - input_tensor: Tensor, optional Keras tensor (i.e. output of - `layers.Input()`) to use as image input for the model. pooling: bool, Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be @@ -47,42 +38,22 @@ class VGG16Backbone(Backbone): the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. - classifier_activation:`str` or callable. The activation function to use - on the "top" layer. Ignored unless `include_top=True`. Set - `classifier_activation=None` to return the logits of the "top" layer. - When loading pretrained weights, `classifier_activation` can only - be `None` or `"softmax"`. - name: (Optional) name to pass to the model, defaults to "VGG16". - Returns: - A `keras.Model` instance. + + Reference: + - [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556) + (ICLR 2015) """ # noqa: E501 def __init__( self, include_rescaling, - include_top, - input_tensor=None, - num_classes=None, input_shape=(224, 224, 3), pooling=None, - classifier_activation="softmax", - name="vgg16_backbone", **kwargs, ): - if include_top and num_classes is None: - raise ValueError( - "If `include_top` is True, you should specify `num_classes`. " - f"Received: num_classes={num_classes}" - ) - - if include_top and pooling: - raise ValueError( - f"`pooling` must be `None` when `include_top=True`." - f"Received pooling={pooling} and include_top={include_top}. " - ) - - img_input = parse_model_inputs(input_shape, input_tensor) + # === Functional Model === + img_input = keras.layers.Input(shape=input_shape) x = img_input if include_rescaling: @@ -143,40 +114,21 @@ def __init__( name="block5", ) - if include_top: - x = layers.Flatten(name="flatten")(x) - x = layers.Dense(4096, activation="relu", name="fc1")(x) - x = layers.Dense(4096, activation="relu", name="fc2")(x) - x = layers.Dense( - num_classes, - activation=classifier_activation, - name="predictions", - )(x) - else: - if pooling == "avg": - x = layers.GlobalAveragePooling2D()(x) - elif pooling == "max": - x = layers.GlobalMaxPooling2D()(x) - - super().__init__(inputs=img_input, outputs=x, name=name, **kwargs) + if pooling == "avg": + x = layers.GlobalAveragePooling2D()(x) + elif pooling == "max": + x = layers.GlobalMaxPooling2D()(x) + super().__init__(inputs=img_input, outputs=x, **kwargs) + + # === Config === self.include_rescaling = include_rescaling - self.include_top = include_top - self.num_classes = num_classes - self.input_tensor = input_tensor self.pooling = pooling - self.classifier_activation = classifier_activation def get_config(self): return { "include_rescaling": self.include_rescaling, - "include_top": self.include_top, - "name": self.name, "input_shape": self.input_shape[1:], - "input_tensor": self.input_tensor, - "pooling": self.pooling, - "num_classes": self.num_classes, - "classifier_activation": self.classifier_activation, "trainable": self.trainable, } @@ -218,15 +170,3 @@ def apply_vgg_block( if max_pool: x = layers.MaxPooling2D((2, 2), (2, 2), name=f"{name}_pool")(x) return x - - -def parse_model_inputs(input_shape, input_tensor, **kwargs): - if input_tensor is None: - return keras.layers.Input(shape=input_shape, **kwargs) - else: - if not keras.backend.is_keras_tensor(input_tensor): - return keras.layers.Input( - tensor=input_tensor, shape=input_shape, **kwargs - ) - else: - return input_tensor diff --git a/keras_nlp/src/models/vgg16/vgg16_backbone_test.py b/keras_nlp/src/models/vgg16/vgg16_backbone_test.py index 6444919ee4..c46bba3d36 100644 --- a/keras_nlp/src/models/vgg16/vgg16_backbone_test.py +++ b/keras_nlp/src/models/vgg16/vgg16_backbone_test.py @@ -23,7 +23,6 @@ class VGG16BackboneTest(TestCase): def setUp(self): self.init_kwargs = { "input_shape": (224, 224, 3), - "include_top": False, "include_rescaling": False, "pooling": "avg", } From 0e73b6fe1ee97eb15190d9fd33f51def4a90188c Mon Sep 17 00:00:00 2001 From: divyashreepathihalli Date: Tue, 6 Aug 2024 22:34:47 +0000 Subject: [PATCH 07/25] Update test case --- .../src/models/vgg16/vgg16_backbone_test.py | 3 +- keras_nlp/src/tests/test_case.py | 44 +------------------ 2 files changed, 3 insertions(+), 44 deletions(-) diff --git a/keras_nlp/src/models/vgg16/vgg16_backbone_test.py b/keras_nlp/src/models/vgg16/vgg16_backbone_test.py index c46bba3d36..1324f1f0d5 100644 --- a/keras_nlp/src/models/vgg16/vgg16_backbone_test.py +++ b/keras_nlp/src/models/vgg16/vgg16_backbone_test.py @@ -29,12 +29,13 @@ def setUp(self): self.input_data = np.ones((2, 224, 224, 3), dtype="float32") def test_backbone_basics(self): - self.run_cv_backbone_test( + self.run_backbone_test( cls=VGG16Backbone, init_kwargs=self.init_kwargs, input_data=self.input_data, expected_output_shape=(2, 512), run_mixed_precision_check=False, + is_cv_backbone=True, ) @pytest.mark.large diff --git a/keras_nlp/src/tests/test_case.py b/keras_nlp/src/tests/test_case.py index 313d7eda95..9a8b8748a5 100644 --- a/keras_nlp/src/tests/test_case.py +++ b/keras_nlp/src/tests/test_case.py @@ -406,6 +406,7 @@ def run_backbone_test( variable_length_data=None, run_mixed_precision_check=True, run_quantization_check=True, + is_cv_backbone=False, ): """Run basic tests for a backbone, including compilation.""" backbone = cls(**init_kwargs) @@ -455,49 +456,6 @@ def run_backbone_test( if run_quantization_check and has_quantization_support(): self.run_quantization_test(backbone, cls, init_kwargs, input_data) - def run_cv_backbone_test( - self, - cls, - init_kwargs, - input_data, - expected_output_shape, - run_mixed_precision_check=True, - run_quantization_check=True, - ): - """Run basic tests for a backbone, including compilation.""" - backbone = cls(**init_kwargs) - # Check serialization (without a full save). - self.run_serialization_test(backbone) - - # Call model eagerly. - output = backbone(input_data) - if isinstance(expected_output_shape, dict): - for key in expected_output_shape: - self.assertEqual(output[key].shape, expected_output_shape[key]) - else: - self.assertEqual(output.shape, expected_output_shape) - - # Check compiled predict function. - backbone.predict(input_data) - # Convert to numpy first, torch GPU tensor -> tf.data will error. - numpy_data = tree.map_structure(ops.convert_to_numpy, input_data) - # Create a dataset. - input_dataset = tf.data.Dataset.from_tensor_slices(numpy_data).batch(2) - backbone.predict(input_dataset) - - # Check name maps to classname. - name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", cls.__name__) - name = re.sub("([a-z])([A-Z])", r"\1_\2", name).lower() - self.assertRegexpMatches(backbone.name, name) - - # Check mixed precision. - if run_mixed_precision_check: - self.run_precision_test(cls, init_kwargs, input_data) - - # Check quantization. - if run_quantization_check and has_quantization_support(): - self.run_quantization_test(backbone, cls, init_kwargs, input_data) - def run_task_test( self, cls, From fac566fab2c6cacc1f4f98b53c78c5d7b0660699 Mon Sep 17 00:00:00 2001 From: divyashreepathihalli Date: Tue, 6 Aug 2024 22:37:05 +0000 Subject: [PATCH 08/25] update backbone test --- keras_nlp/src/tests/test_case.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/keras_nlp/src/tests/test_case.py b/keras_nlp/src/tests/test_case.py index 9a8b8748a5..4ec9efecd2 100644 --- a/keras_nlp/src/tests/test_case.py +++ b/keras_nlp/src/tests/test_case.py @@ -420,20 +420,22 @@ def run_backbone_test( self.assertEqual(output[key].shape, expected_output_shape[key]) else: self.assertEqual(output.shape, expected_output_shape) - - # Check we can embed tokens eagerly. - output = backbone.token_embedding(ops.zeros((2, 3), dtype="int32")) - - # Check variable length sequences. - if variable_length_data is None: - # If no variable length data passed, assume the second axis of all - # inputs is our sequence axis and create it ourselves. - variable_length_data = [ - tree.map_structure(lambda x: x[:, :seq_length, ...], input_data) - for seq_length in (2, 3, 4) - ] - for batch in variable_length_data: - backbone(batch) + if not is_cv_backbone: + # Check we can embed tokens eagerly. + output = backbone.token_embedding(ops.zeros((2, 3), dtype="int32")) + + # Check variable length sequences. + if variable_length_data is None: + # If no variable length data passed, assume the second axis of all + # inputs is our sequence axis and create it ourselves. + variable_length_data = [ + tree.map_structure( + lambda x: x[:, :seq_length, ...], input_data + ) + for seq_length in (2, 3, 4) + ] + for batch in variable_length_data: + backbone(batch) # Check compiled predict function. backbone.predict(input_data) From eef44057aff63a9464ddf65337137942ac6a089b Mon Sep 17 00:00:00 2001 From: divyashreepathihalli Date: Tue, 6 Aug 2024 23:19:06 +0000 Subject: [PATCH 09/25] add image classifier --- keras_nlp/src/models/image_classifier.py | 144 ++++++++++++++++++ .../models/vgg16/vgg16_image_classifier.py | 116 -------------- 2 files changed, 144 insertions(+), 116 deletions(-) create mode 100644 keras_nlp/src/models/image_classifier.py delete mode 100644 keras_nlp/src/models/vgg16/vgg16_image_classifier.py diff --git a/keras_nlp/src/models/image_classifier.py b/keras_nlp/src/models/image_classifier.py new file mode 100644 index 0000000000..72533cf6b3 --- /dev/null +++ b/keras_nlp/src/models/image_classifier.py @@ -0,0 +1,144 @@ +# Copyright 2023 The KerasNLP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import keras + +from keras_nlp.src.api_export import keras_nlp_export +from keras_nlp.src.models.task import Task + + +@keras_nlp_export("keras_nlp.models.ImageClassifier") +class ImageClassifier(Task): + """Base class for all image classification tasks. + + `Classifier` tasks wrap a `keras_nlp.models.Backbone` and + a `keras_nlp.models.Preprocessor` to create a model that can be used for + image classification. + + Args: + backbone: `keras.Model` instance, the backbone architecture of the + classifier called on the inputs. Pooling will be called on the last + dimension of the backbone output. + num_classes: int, number of classes to predict. + pooling: str, type of pooling layer. Must be one of "avg", "max". + activation: Optional `str` or callable, defaults to "softmax". The + activation function to use on the Dense layer. Set `activation=None` + to return the output logits. + + To fine-tune with `fit()`, pass a dataset containing tuples of `(x, y)` + labels where `x` is a string and `y` is a integer from `[0, num_classes)`. + + All `Classifier` tasks include a `from_preset()` constructor which can be + used to load a pre-trained config and weights. + """ + + def __init__( + self, + backbone, + num_classes, + pooling="avg", + activation="softmax", + **kwargs, + ): + if pooling == "avg": + pooling_layer = keras.layers.GlobalAveragePooling2D(name="avg_pool") + elif pooling == "max": + pooling_layer = keras.layers.GlobalMaxPooling2D(name="max_pool") + else: + raise ValueError( + f'`pooling` must be one of "avg", "max". Received: {pooling}.' + ) + inputs = backbone.input + x = backbone(inputs) + x = pooling_layer(x) + outputs = keras.layers.Dense( + num_classes, + activation=activation, + name="predictions", + )(x) + + # Instantiate using Functional API Model constructor + super().__init__( + inputs=inputs, + outputs=outputs, + **kwargs, + ) + # All references to `self` below this line + self.backbone = backbone + self.num_classes = num_classes + self.pooling = pooling + self.activation = activation + + def get_config(self): + # Backbone serialized in `super` + config = super().get_config() + config.update( + { + "backbone": keras.layers.serialize(self.backbone), + "num_classes": self.num_classes, + "pooling": self.pooling, + "activation": self.activation, + } + ) + return config + + def compile( + self, + optimizer="auto", + loss="auto", + *, + metrics="auto", + **kwargs, + ): + """Configures the `ImageClassifier` task for training. + + The `Classifier` task extends the default compilation signature of + `keras.Model.compile` with defaults for `optimizer`, `loss`, and + `metrics`. To override these defaults, pass any value + to these arguments during compilation. + + Args: + optimizer: `"auto"`, an optimizer name, or a `keras.Optimizer` + instance. Defaults to `"auto"`, which uses the default optimizer + for the given model and task. See `keras.Model.compile` and + `keras.optimizers` for more info on possible `optimizer` values. + loss: `"auto"`, a loss name, or a `keras.losses.Loss` instance. + Defaults to `"auto"`, where a + `keras.losses.SparseCategoricalCrossentropy` loss will be + applied for the classification task. See + `keras.Model.compile` and `keras.losses` for more info on + possible `loss` values. + metrics: `"auto"`, or a list of metrics to be evaluated by + the model during training and testing. Defaults to `"auto"`, + where a `keras.metrics.SparseCategoricalAccuracy` will be + applied to track the accuracy of the model during training. + See `keras.Model.compile` and `keras.metrics` for + more info on possible `metrics` values. + **kwargs: See `keras.Model.compile` for a full list of arguments + supported by the compile method. + """ + if optimizer == "auto": + optimizer = keras.optimizers.Adam(5e-5) + if loss == "auto": + activation = getattr(self, "activation", None) + activation = keras.activations.get(activation) + from_logits = activation != keras.activations.softmax + loss = keras.losses.SparseCategoricalCrossentropy(from_logits) + if metrics == "auto": + metrics = [keras.metrics.SparseCategoricalAccuracy()] + super().compile( + optimizer=optimizer, + loss=loss, + metrics=metrics, + **kwargs, + ) diff --git a/keras_nlp/src/models/vgg16/vgg16_image_classifier.py b/keras_nlp/src/models/vgg16/vgg16_image_classifier.py deleted file mode 100644 index e395fbc21c..0000000000 --- a/keras_nlp/src/models/vgg16/vgg16_image_classifier.py +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright 2024 The KerasNLP Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Image classifier model using pooling and dense layers.""" -import keras - -from keras_nlp.src.api_export import keras_nlp_export -from keras_nlp.src.models.classifier import Classifier -from keras_nlp.src.models.vgg16.vgg16_backbone import VGG16Backbone - - -@keras_nlp_export("keras_nlp.models.VGG16ImageClassifier") -class ImageClassifier(Classifier): - """Image classifier with pooling and dense layer prediction head. - - Args: - backbone: `keras.Model` instance, the backbone architecture of the - classifier called on the inputs. Pooling will be called on the last - dimension of the backbone output. - num_classes: int, number of classes to predict. - pooling: str, type of pooling layer. Must be one of "avg", "max". - activation: Optional `str` or callable, defaults to "softmax". The - activation function to use on the Dense layer. Set `activation=None` - to return the output logits. - - Example: - ```python - input_data = tf.ones(shape=(8, 224, 224, 3)) - - # Pretrained classifier (e.g., for imagenet categories) - model = keras_cv.models.ImageClassifier.from_preset( - "resnet50_v2_imagenet_classifier", - ) - output = model(input_data) - - # Pretrained backbone - backbone = keras_cv.models.ResNet50V2Backbone.from_preset( - "resnet50_v2_imagenet", - ) - model = keras_cv.models.ImageClassifier( - backbone=backbone, - num_classes=4, - ) - output = model(input_data) - - # Randomly initialized backbone with a custom config - model = keras_cv.models.ImageClassifier( - backbone=keras_cv.models.ResNet50V2Backbone(), - num_classes=4, - ) - output = model(input_data) - ``` - """ - - backbone_cls = VGG16Backbone - - def __init__( - self, - backbone, - num_classes, - pooling="avg", - activation="softmax", - **kwargs, - ): - self.backbone = backbone - if pooling == "avg": - pooling_layer = keras.layers.GlobalAveragePooling2D(name="avg_pool") - elif pooling == "max": - pooling_layer = keras.layers.GlobalMaxPooling2D(name="max_pool") - else: - raise ValueError( - f'`pooling` must be one of "avg", "max". Received: {pooling}.' - ) - inputs = backbone.input - x = backbone(inputs) - x = pooling_layer(x) - outputs = keras.layers.Dense( - num_classes, - activation=activation, - name="predictions", - )(x) - - # Instantiate using Functional API Model constructor - super().__init__( - inputs=inputs, - outputs=outputs, - **kwargs, - ) - # All references to `self` below this line - self.num_classes = num_classes - self.pooling = pooling - self.activation = activation - - def get_config(self): - # Backbone serialized in `super` - config = super().get_config() - config.update( - { - "backbone": keras.layers.serialize(self.backbone), - "num_classes": self.num_classes, - "pooling": self.pooling, - "activation": self.activation, - } - ) - return config From 0c481ef6c629a2463cf08c04892129c0ad1ba323 Mon Sep 17 00:00:00 2001 From: divyashreepathihalli Date: Tue, 6 Aug 2024 23:27:44 +0000 Subject: [PATCH 10/25] classifier cleanup --- keras_nlp/src/models/image_classifier.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/keras_nlp/src/models/image_classifier.py b/keras_nlp/src/models/image_classifier.py index 72533cf6b3..6dbffe7364 100644 --- a/keras_nlp/src/models/image_classifier.py +++ b/keras_nlp/src/models/image_classifier.py @@ -50,6 +50,7 @@ def __init__( activation="softmax", **kwargs, ): + # === Layers === if pooling == "avg": pooling_layer = keras.layers.GlobalAveragePooling2D(name="avg_pool") elif pooling == "max": @@ -58,6 +59,7 @@ def __init__( raise ValueError( f'`pooling` must be one of "avg", "max". Received: {pooling}.' ) + # === Functional Model === inputs = backbone.input x = backbone(inputs) x = pooling_layer(x) @@ -73,7 +75,7 @@ def __init__( outputs=outputs, **kwargs, ) - # All references to `self` below this line + # === Config === self.backbone = backbone self.num_classes = num_classes self.pooling = pooling From 5206dc9c4221d24388a50ad0fcac5632a6507a36 Mon Sep 17 00:00:00 2001 From: divyashreepathihalli Date: Tue, 6 Aug 2024 23:29:52 +0000 Subject: [PATCH 11/25] code reformat --- keras_nlp/src/models/image_classifier.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/keras_nlp/src/models/image_classifier.py b/keras_nlp/src/models/image_classifier.py index 6dbffe7364..961e0a7666 100644 --- a/keras_nlp/src/models/image_classifier.py +++ b/keras_nlp/src/models/image_classifier.py @@ -21,7 +21,7 @@ class ImageClassifier(Task): """Base class for all image classification tasks. - `Classifier` tasks wrap a `keras_nlp.models.Backbone` and + `ImageClassifier` tasks wrap a `keras_nlp.models.Backbone` and a `keras_nlp.models.Preprocessor` to create a model that can be used for image classification. @@ -38,7 +38,7 @@ class ImageClassifier(Task): To fine-tune with `fit()`, pass a dataset containing tuples of `(x, y)` labels where `x` is a string and `y` is a integer from `[0, num_classes)`. - All `Classifier` tasks include a `from_preset()` constructor which can be + All `ImageClassifier` tasks include a `from_preset()` constructor which can be used to load a pre-trained config and weights. """ @@ -104,7 +104,7 @@ def compile( ): """Configures the `ImageClassifier` task for training. - The `Classifier` task extends the default compilation signature of + The `ImageClassifier` task extends the default compilation signature of `keras.Model.compile` with defaults for `optimizer`, `loss`, and `metrics`. To override these defaults, pass any value to these arguments during compilation. From 1bcd5b2a2da0dac57a067aff454ecf796d8bda2b Mon Sep 17 00:00:00 2001 From: divyashreepathihalli Date: Tue, 6 Aug 2024 23:43:04 +0000 Subject: [PATCH 12/25] add vgg16 image classifier --- keras_nlp/src/models/image_classifier.py | 68 ++----------- .../models/vgg16/vgg16_image_classifier.py | 96 +++++++++++++++++++ 2 files changed, 102 insertions(+), 62 deletions(-) create mode 100644 keras_nlp/src/models/vgg16/vgg16_image_classifier.py diff --git a/keras_nlp/src/models/image_classifier.py b/keras_nlp/src/models/image_classifier.py index 961e0a7666..f0cc031dbc 100644 --- a/keras_nlp/src/models/image_classifier.py +++ b/keras_nlp/src/models/image_classifier.py @@ -23,17 +23,8 @@ class ImageClassifier(Task): `ImageClassifier` tasks wrap a `keras_nlp.models.Backbone` and a `keras_nlp.models.Preprocessor` to create a model that can be used for - image classification. - - Args: - backbone: `keras.Model` instance, the backbone architecture of the - classifier called on the inputs. Pooling will be called on the last - dimension of the backbone output. - num_classes: int, number of classes to predict. - pooling: str, type of pooling layer. Must be one of "avg", "max". - activation: Optional `str` or callable, defaults to "softmax". The - activation function to use on the Dense layer. Set `activation=None` - to return the output logits. + image classification. `ImageClassifier` tasks take an additional + `num_classes` argument, controlling the number of predicted output classes. To fine-tune with `fit()`, pass a dataset containing tuples of `(x, y)` labels where `x` is a string and `y` is a integer from `[0, num_classes)`. @@ -42,57 +33,10 @@ class ImageClassifier(Task): used to load a pre-trained config and weights. """ - def __init__( - self, - backbone, - num_classes, - pooling="avg", - activation="softmax", - **kwargs, - ): - # === Layers === - if pooling == "avg": - pooling_layer = keras.layers.GlobalAveragePooling2D(name="avg_pool") - elif pooling == "max": - pooling_layer = keras.layers.GlobalMaxPooling2D(name="max_pool") - else: - raise ValueError( - f'`pooling` must be one of "avg", "max". Received: {pooling}.' - ) - # === Functional Model === - inputs = backbone.input - x = backbone(inputs) - x = pooling_layer(x) - outputs = keras.layers.Dense( - num_classes, - activation=activation, - name="predictions", - )(x) - - # Instantiate using Functional API Model constructor - super().__init__( - inputs=inputs, - outputs=outputs, - **kwargs, - ) - # === Config === - self.backbone = backbone - self.num_classes = num_classes - self.pooling = pooling - self.activation = activation - - def get_config(self): - # Backbone serialized in `super` - config = super().get_config() - config.update( - { - "backbone": keras.layers.serialize(self.backbone), - "num_classes": self.num_classes, - "pooling": self.pooling, - "activation": self.activation, - } - ) - return config + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # Default compilation. + self.compile() def compile( self, diff --git a/keras_nlp/src/models/vgg16/vgg16_image_classifier.py b/keras_nlp/src/models/vgg16/vgg16_image_classifier.py new file mode 100644 index 0000000000..9c8ee4a17a --- /dev/null +++ b/keras_nlp/src/models/vgg16/vgg16_image_classifier.py @@ -0,0 +1,96 @@ +# Copyright 2023 The KerasNLP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import keras + +from keras_nlp.src.models.image_classifier import ImageClassifier +from keras_nlp.src.models.vgg16 import VGG16Backbone + + +class VGG16ImageClassifier(ImageClassifier): + """Base class for all image classification tasks. + + `ImageClassifier` tasks wrap a `keras_nlp.models.Backbone` and + a `keras_nlp.models.Preprocessor` to create a model that can be used for + image classification. + + Args: + backbone: `keras.Model` instance, the backbone architecture of the + classifier called on the inputs. Pooling will be called on the last + dimension of the backbone output. + num_classes: int, number of classes to predict. + pooling: str, type of pooling layer. Must be one of "avg", "max". + activation: Optional `str` or callable, defaults to "softmax". The + activation function to use on the Dense layer. Set `activation=None` + to return the output logits. + + To fine-tune with `fit()`, pass a dataset containing tuples of `(x, y)` + labels where `x` is a string and `y` is a integer from `[0, num_classes)`. + + All `ImageClassifier` tasks include a `from_preset()` constructor which can be + used to load a pre-trained config and weights. + """ + + backbone_cls = VGG16Backbone + + def __init__( + self, + backbone, + num_classes, + pooling="avg", + activation="softmax", + **kwargs, + ): + # === Layers === + if pooling == "avg": + pooling_layer = keras.layers.GlobalAveragePooling2D(name="avg_pool") + elif pooling == "max": + pooling_layer = keras.layers.GlobalMaxPooling2D(name="max_pool") + else: + raise ValueError( + f'`pooling` must be one of "avg", "max". Received: {pooling}.' + ) + # === Functional Model === + inputs = backbone.input + x = backbone(inputs) + x = pooling_layer(x) + outputs = keras.layers.Dense( + num_classes, + activation=activation, + name="predictions", + )(x) + + # Instantiate using Functional API Model constructor + super().__init__( + inputs=inputs, + outputs=outputs, + **kwargs, + ) + # === Config === + self.backbone = backbone + self.num_classes = num_classes + self.pooling = pooling + self.activation = activation + + def get_config(self): + # Backbone serialized in `super` + config = super().get_config() + config.update( + { + "backbone": keras.layers.serialize(self.backbone), + "num_classes": self.num_classes, + "pooling": self.pooling, + "activation": self.activation, + } + ) + return config From a8b4bf246b595ebea788a5f25035ac69c23db042 Mon Sep 17 00:00:00 2001 From: divyashreepathihalli Date: Wed, 7 Aug 2024 18:53:06 +0000 Subject: [PATCH 13/25] make vgg generic --- .../src/models/{vgg16 => vgg}/__init__.py | 2 +- .../vgg16_backbone.py => vgg/vgg_backbone.py} | 72 ++++++------------- .../vgg_backbone_test.py} | 9 +-- .../vgg_image_classifier.py} | 34 ++++----- 4 files changed, 46 insertions(+), 71 deletions(-) rename keras_nlp/src/models/{vgg16 => vgg}/__init__.py (89%) rename keras_nlp/src/models/{vgg16/vgg16_backbone.py => vgg/vgg_backbone.py} (77%) rename keras_nlp/src/models/{vgg16/vgg16_backbone_test.py => vgg/vgg_backbone_test.py} (86%) rename keras_nlp/src/models/{vgg16/vgg16_image_classifier.py => vgg/vgg_image_classifier.py} (81%) diff --git a/keras_nlp/src/models/vgg16/__init__.py b/keras_nlp/src/models/vgg/__init__.py similarity index 89% rename from keras_nlp/src/models/vgg16/__init__.py rename to keras_nlp/src/models/vgg/__init__.py index f02919ada3..b84103ad36 100644 --- a/keras_nlp/src/models/vgg16/__init__.py +++ b/keras_nlp/src/models/vgg/__init__.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -from keras_nlp.src.models.vgg16.vgg16_backbone import VGG16Backbone +from keras_nlp.src.models.vgg.vgg_backbone import VGGBackbone diff --git a/keras_nlp/src/models/vgg16/vgg16_backbone.py b/keras_nlp/src/models/vgg/vgg_backbone.py similarity index 77% rename from keras_nlp/src/models/vgg16/vgg16_backbone.py rename to keras_nlp/src/models/vgg/vgg_backbone.py index 1896a9da56..e7b6563695 100644 --- a/keras_nlp/src/models/vgg16/vgg16_backbone.py +++ b/keras_nlp/src/models/vgg/vgg_backbone.py @@ -18,12 +18,15 @@ from keras_nlp.src.models.backbone import Backbone -@keras_nlp_export("keras_nlp.models.VGG16Backbone") -class VGG16Backbone(Backbone): +@keras_nlp_export("keras_nlp.models.VGGBackbone") +class VGGBackbone(Backbone): """ This class represents Keras Backbone of VGG16 model. Args: + stackwise_num_repeats: list of ints, number of repeated convolutional + blocks per dense block. For VGG16 this is [2, 2, 3, 3, 3] and for + VGG19 this is [2, 2, 4, 4, 4]. include_rescaling: bool, whether to rescale the inputs. If set to True, inputs will be passed through a `Rescaling(1/255.0)` layer. input_shape: tuple, optional shape tuple, defaults to (224, 224, 3). @@ -46,6 +49,7 @@ class VGG16Backbone(Backbone): def __init__( self, + stackwise_num_repeats, include_rescaling, input_shape=(224, 224, 3), pooling=None, @@ -58,60 +62,28 @@ def __init__( if include_rescaling: x = layers.Rescaling(scale=1 / 255.0)(x) - - x = apply_vgg_block( - x=x, - num_layers=2, - filters=64, - kernel_size=(3, 3), - activation="relu", - padding="same", - max_pool=True, - name="block1", - ) - - x = apply_vgg_block( - x=x, - num_layers=2, - filters=128, - kernel_size=(3, 3), - activation="relu", - padding="same", - max_pool=True, - name="block2", - ) - - x = apply_vgg_block( - x=x, - num_layers=3, - filters=256, - kernel_size=(3, 3), - activation="relu", - padding="same", - max_pool=True, - name="block3", - ) - - x = apply_vgg_block( - x=x, - num_layers=3, - filters=512, - kernel_size=(3, 3), - activation="relu", - padding="same", - max_pool=True, - name="block4", - ) - + filters_size = 64 + for stack_index in range(len(stackwise_num_repeats) - 1): + x = apply_vgg_block( + x=x, + num_layers=stackwise_num_repeats[stack_index], + filters=filters_size, + kernel_size=(3, 3), + activation="relu", + padding="same", + max_pool=True, + name=f"block{stack_index + 1}", + ) + filters_size = filters_size * 2 x = apply_vgg_block( x=x, - num_layers=3, + num_layers=stackwise_num_repeats[-1], filters=512, kernel_size=(3, 3), activation="relu", padding="same", max_pool=True, - name="block5", + name=f"block{len(stackwise_num_repeats)}", ) if pooling == "avg": @@ -124,9 +96,11 @@ def __init__( # === Config === self.include_rescaling = include_rescaling self.pooling = pooling + self.stackwise_num_repeats = stackwise_num_repeats def get_config(self): return { + "stackwise_num_repeats": self.stackwise_num_repeats, "include_rescaling": self.include_rescaling, "input_shape": self.input_shape[1:], "trainable": self.trainable, diff --git a/keras_nlp/src/models/vgg16/vgg16_backbone_test.py b/keras_nlp/src/models/vgg/vgg_backbone_test.py similarity index 86% rename from keras_nlp/src/models/vgg16/vgg16_backbone_test.py rename to keras_nlp/src/models/vgg/vgg_backbone_test.py index 1324f1f0d5..428d270442 100644 --- a/keras_nlp/src/models/vgg16/vgg16_backbone_test.py +++ b/keras_nlp/src/models/vgg/vgg_backbone_test.py @@ -15,13 +15,14 @@ import numpy as np import pytest -from keras_nlp.src.models.vgg16.vgg16_backbone import VGG16Backbone +from keras_nlp.src.models.vgg.vgg_backbone import VGGBackbone from keras_nlp.src.tests.test_case import TestCase -class VGG16BackboneTest(TestCase): +class VGGBackboneTest(TestCase): def setUp(self): self.init_kwargs = { + "stackwise_num_repeats": [2, 2, 3, 3, 3], "input_shape": (224, 224, 3), "include_rescaling": False, "pooling": "avg", @@ -30,7 +31,7 @@ def setUp(self): def test_backbone_basics(self): self.run_backbone_test( - cls=VGG16Backbone, + cls=VGGBackbone, init_kwargs=self.init_kwargs, input_data=self.input_data, expected_output_shape=(2, 512), @@ -41,7 +42,7 @@ def test_backbone_basics(self): @pytest.mark.large def test_saved_model(self): self.run_model_saving_test( - cls=VGG16Backbone, + cls=VGGBackbone, init_kwargs=self.init_kwargs, input_data=self.input_data, ) diff --git a/keras_nlp/src/models/vgg16/vgg16_image_classifier.py b/keras_nlp/src/models/vgg/vgg_image_classifier.py similarity index 81% rename from keras_nlp/src/models/vgg16/vgg16_image_classifier.py rename to keras_nlp/src/models/vgg/vgg_image_classifier.py index 9c8ee4a17a..9cfd67335d 100644 --- a/keras_nlp/src/models/vgg16/vgg16_image_classifier.py +++ b/keras_nlp/src/models/vgg/vgg_image_classifier.py @@ -14,15 +14,11 @@ import keras from keras_nlp.src.models.image_classifier import ImageClassifier -from keras_nlp.src.models.vgg16 import VGG16Backbone +from keras_nlp.src.models.vgg import VGGBackbone class VGG16ImageClassifier(ImageClassifier): - """Base class for all image classification tasks. - - `ImageClassifier` tasks wrap a `keras_nlp.models.Backbone` and - a `keras_nlp.models.Preprocessor` to create a model that can be used for - image classification. + """VGG16 image classifier task model. Args: backbone: `keras.Model` instance, the backbone architecture of the @@ -41,7 +37,7 @@ class VGG16ImageClassifier(ImageClassifier): used to load a pre-trained config and weights. """ - backbone_cls = VGG16Backbone + backbone_cls = VGGBackbone def __init__( self, @@ -52,18 +48,25 @@ def __init__( **kwargs, ): # === Layers === - if pooling == "avg": - pooling_layer = keras.layers.GlobalAveragePooling2D(name="avg_pool") - elif pooling == "max": - pooling_layer = keras.layers.GlobalMaxPooling2D(name="max_pool") + self.backbone = backbone + self.pooling = pooling + self.activation = activation + if self.pooling == "avg": + self.pooling_layer = keras.layers.GlobalAveragePooling2D( + name="avg_pool" + ) + elif self.pooling == "max": + self.pooling_layer = keras.layers.GlobalMaxPooling2D( + name="max_pool" + ) else: raise ValueError( f'`pooling` must be one of "avg", "max". Received: {pooling}.' ) # === Functional Model === - inputs = backbone.input - x = backbone(inputs) - x = pooling_layer(x) + inputs = self.backbone.input + x = self.backbone(inputs) + x = self.pooling_layer(x) outputs = keras.layers.Dense( num_classes, activation=activation, @@ -77,10 +80,7 @@ def __init__( **kwargs, ) # === Config === - self.backbone = backbone self.num_classes = num_classes - self.pooling = pooling - self.activation = activation def get_config(self): # Backbone serialized in `super` From 41a87338206b885865ecec2b378e5410ebf3d971 Mon Sep 17 00:00:00 2001 From: divyashreepathihalli Date: Wed, 7 Aug 2024 18:59:33 +0000 Subject: [PATCH 14/25] update doc string --- keras_nlp/src/models/vgg/vgg_backbone.py | 8 ++++---- keras_nlp/src/models/vgg/vgg_image_classifier.py | 7 ++++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/keras_nlp/src/models/vgg/vgg_backbone.py b/keras_nlp/src/models/vgg/vgg_backbone.py index e7b6563695..6cc78bf064 100644 --- a/keras_nlp/src/models/vgg/vgg_backbone.py +++ b/keras_nlp/src/models/vgg/vgg_backbone.py @@ -23,6 +23,10 @@ class VGGBackbone(Backbone): """ This class represents Keras Backbone of VGG16 model. + This class implements a VGG backbone as described in [Very Deep + Convolutional Networks for Large-Scale Image Recognition]( + https://arxiv.org/abs/1409.1556)(ICLR 2015). + Args: stackwise_num_repeats: list of ints, number of repeated convolutional blocks per dense block. For VGG16 this is [2, 2, 3, 3, 3] and for @@ -41,10 +45,6 @@ class VGGBackbone(Backbone): the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. - - Reference: - - [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556) - (ICLR 2015) """ # noqa: E501 def __init__( diff --git a/keras_nlp/src/models/vgg/vgg_image_classifier.py b/keras_nlp/src/models/vgg/vgg_image_classifier.py index 9cfd67335d..3b831cc1f1 100644 --- a/keras_nlp/src/models/vgg/vgg_image_classifier.py +++ b/keras_nlp/src/models/vgg/vgg_image_classifier.py @@ -21,9 +21,7 @@ class VGG16ImageClassifier(ImageClassifier): """VGG16 image classifier task model. Args: - backbone: `keras.Model` instance, the backbone architecture of the - classifier called on the inputs. Pooling will be called on the last - dimension of the backbone output. + backbone: A `keras_nlp.models.VGGBackbone` instance. num_classes: int, number of classes to predict. pooling: str, type of pooling layer. Must be one of "avg", "max". activation: Optional `str` or callable, defaults to "softmax". The @@ -47,6 +45,7 @@ def __init__( activation="softmax", **kwargs, ): + # === Layers === self.backbone = backbone self.pooling = pooling @@ -63,6 +62,7 @@ def __init__( raise ValueError( f'`pooling` must be one of "avg", "max". Received: {pooling}.' ) + # === Functional Model === inputs = self.backbone.input x = self.backbone(inputs) @@ -79,6 +79,7 @@ def __init__( outputs=outputs, **kwargs, ) + # === Config === self.num_classes = num_classes From 40ad2ed41d8b2c2c4102f53a458a5961711a2aae Mon Sep 17 00:00:00 2001 From: divyashreepathihalli Date: Wed, 7 Aug 2024 19:19:00 +0000 Subject: [PATCH 15/25] update docstring --- keras_nlp/src/models/vgg/vgg_backbone.py | 18 ++++++++ .../src/models/vgg/vgg_image_classifier.py | 43 ++++++++++++++++++- .../models/vgg/vgg_image_classifier_test.py | 13 ++++++ 3 files changed, 72 insertions(+), 2 deletions(-) create mode 100644 keras_nlp/src/models/vgg/vgg_image_classifier_test.py diff --git a/keras_nlp/src/models/vgg/vgg_backbone.py b/keras_nlp/src/models/vgg/vgg_backbone.py index 6cc78bf064..2d8fb2a31b 100644 --- a/keras_nlp/src/models/vgg/vgg_backbone.py +++ b/keras_nlp/src/models/vgg/vgg_backbone.py @@ -45,6 +45,24 @@ class VGGBackbone(Backbone): the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. + + Examples: + ```python + input_data = np.ones((2, 224, 224, 3), dtype="float32") + + # Pretrained VGG backbone. + model = keras_nlp.models.VGGBackbone.from_preset("vgg16") + model(input_data) + + # Randomly initialized VGG backbone with a custom config. + model = keras_nlp.models.VGGBackbone( + stackwise_num_repeats = [2, 2, 3, 3, 3], + input_shape = (224, 224, 3), + include_rescaling = False, + pooling = "avg", + ) + model(input_data) + ``` """ # noqa: E501 def __init__( diff --git a/keras_nlp/src/models/vgg/vgg_image_classifier.py b/keras_nlp/src/models/vgg/vgg_image_classifier.py index 3b831cc1f1..7b5a2fdb5c 100644 --- a/keras_nlp/src/models/vgg/vgg_image_classifier.py +++ b/keras_nlp/src/models/vgg/vgg_image_classifier.py @@ -17,7 +17,7 @@ from keras_nlp.src.models.vgg import VGGBackbone -class VGG16ImageClassifier(ImageClassifier): +class VGGImageClassifier(ImageClassifier): """VGG16 image classifier task model. Args: @@ -30,9 +30,48 @@ class VGG16ImageClassifier(ImageClassifier): To fine-tune with `fit()`, pass a dataset containing tuples of `(x, y)` labels where `x` is a string and `y` is a integer from `[0, num_classes)`. - All `ImageClassifier` tasks include a `from_preset()` constructor which can be used to load a pre-trained config and weights. + + Examples: + Train from preset + ```python + # Load preset and train + images = np.ones((2, 224, 224, 3), dtype="float32") + labels = [0, 3] + classifier = keras_nlp.models.VGGImageClassifier.from_preset( + 'vgg_16_image_classifier') + classifier.fit(x=images, y=labels, batch_size=2) + + # Re-compile (e.g., with a new learning rate). + classifier.compile( + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + optimizer=keras.optimizers.Adam(5e-5), + jit_compile=True, + ) + + # Access backbone programmatically (e.g., to change `trainable`). + classifier.backbone.trainable = False + # Fit again. + classifier.fit(x=images, y=labels, batch_size=2) + ``` + Custom backbone + ```python + images = np.ones((2, 224, 224, 3), dtype="float32") + labels = [0, 3] + + backbone = keras_nlp.models.VGGBackbone( + stackwise_num_repeats = [2, 2, 3, 3, 3], + input_shape = (224, 224, 3), + include_rescaling = False, + pooling = "avg", + ) + classifier = keras_nlp.models.VGGImageClassifier( + backbone=backbone, + num_classes=4, + ) + classifier.fit(x=images, y=labels, batch_size=2) + ``` """ backbone_cls = VGGBackbone diff --git a/keras_nlp/src/models/vgg/vgg_image_classifier_test.py b/keras_nlp/src/models/vgg/vgg_image_classifier_test.py new file mode 100644 index 0000000000..ba0c2545e4 --- /dev/null +++ b/keras_nlp/src/models/vgg/vgg_image_classifier_test.py @@ -0,0 +1,13 @@ +# Copyright 2023 The KerasNLP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. From b1a6dfd00c59c69b022130e85fcb8a305d8e1888 Mon Sep 17 00:00:00 2001 From: divyashreepathihalli Date: Wed, 7 Aug 2024 19:30:31 +0000 Subject: [PATCH 16/25] add classifier test --- .../models/vgg/vgg_image_classifier_test.py | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/keras_nlp/src/models/vgg/vgg_image_classifier_test.py b/keras_nlp/src/models/vgg/vgg_image_classifier_test.py index ba0c2545e4..2967f85c31 100644 --- a/keras_nlp/src/models/vgg/vgg_image_classifier_test.py +++ b/keras_nlp/src/models/vgg/vgg_image_classifier_test.py @@ -11,3 +11,47 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import numpy as np +import pytest + +from keras_nlp.src.models.vgg.vgg_backbone import VGGBackbone +from keras_nlp.src.models.vgg.vgg_image_classifier import VGGImageClassifier +from keras_nlp.src.tests.test_case import TestCase + + +class VGGImageClassifierTest(TestCase): + def setUp(self): + # Setup model. + images = np.ones((2, 224, 224, 3), dtype="float32") + labels = [0, 3] + self.backbone = VGGBackbone( + stackwise_num_repeats=[2, 2, 3, 3, 3], + input_shape=(224, 224, 3), + include_rescaling=False, + pooling="avg", + ) + self.init_kwargs = { + "backbone": self.backbone, + "num_classes": 4, + } + self.train_data = ( + images, + labels, + ) + + def test_classifier_basics(self): + pytest.skip(reason="enable after preprocessor flow is figured out") + self.run_task_test( + cls=VGGImageClassifier, + init_kwargs=self.init_kwargs, + train_data=self.train_data, + expected_output_shape=(2, 2), + ) + + @pytest.mark.large + def test_saved_model(self): + self.run_model_saving_test( + cls=VGGImageClassifier, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + ) From 443af980a1848642db2f9271a8db7de0e9f9d53d Mon Sep 17 00:00:00 2001 From: divyashreepathihalli Date: Wed, 7 Aug 2024 19:51:08 +0000 Subject: [PATCH 17/25] update tests --- keras_nlp/src/models/vgg/vgg_backbone.py | 3 +-- keras_nlp/src/models/vgg/vgg_image_classifier_test.py | 10 +++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/keras_nlp/src/models/vgg/vgg_backbone.py b/keras_nlp/src/models/vgg/vgg_backbone.py index 2d8fb2a31b..9785bba585 100644 --- a/keras_nlp/src/models/vgg/vgg_backbone.py +++ b/keras_nlp/src/models/vgg/vgg_backbone.py @@ -21,7 +21,7 @@ @keras_nlp_export("keras_nlp.models.VGGBackbone") class VGGBackbone(Backbone): """ - This class represents Keras Backbone of VGG16 model. + This class represents Keras Backbone of VGG model. This class implements a VGG backbone as described in [Very Deep Convolutional Networks for Large-Scale Image Recognition]( @@ -103,7 +103,6 @@ def __init__( max_pool=True, name=f"block{len(stackwise_num_repeats)}", ) - if pooling == "avg": x = layers.GlobalAveragePooling2D()(x) elif pooling == "max": diff --git a/keras_nlp/src/models/vgg/vgg_image_classifier_test.py b/keras_nlp/src/models/vgg/vgg_image_classifier_test.py index 2967f85c31..41fc58fbe5 100644 --- a/keras_nlp/src/models/vgg/vgg_image_classifier_test.py +++ b/keras_nlp/src/models/vgg/vgg_image_classifier_test.py @@ -22,8 +22,8 @@ class VGGImageClassifierTest(TestCase): def setUp(self): # Setup model. - images = np.ones((2, 224, 224, 3), dtype="float32") - labels = [0, 3] + self.images = np.ones((2, 224, 224, 3), dtype="float32") + self.labels = [0, 3] self.backbone = VGGBackbone( stackwise_num_repeats=[2, 2, 3, 3, 3], input_shape=(224, 224, 3), @@ -35,8 +35,8 @@ def setUp(self): "num_classes": 4, } self.train_data = ( - images, - labels, + self.images, + self.labels, ) def test_classifier_basics(self): @@ -53,5 +53,5 @@ def test_saved_model(self): self.run_model_saving_test( cls=VGGImageClassifier, init_kwargs=self.init_kwargs, - input_data=self.input_data, + input_data=self.images, ) From eb818d177a70ef321251c7a4bd82b9db71280c79 Mon Sep 17 00:00:00 2001 From: divyashreepathihalli Date: Wed, 7 Aug 2024 19:55:01 +0000 Subject: [PATCH 18/25] update docstring --- keras_nlp/src/models/vgg/vgg_backbone.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/keras_nlp/src/models/vgg/vgg_backbone.py b/keras_nlp/src/models/vgg/vgg_backbone.py index 9785bba585..5a1c37baf4 100644 --- a/keras_nlp/src/models/vgg/vgg_backbone.py +++ b/keras_nlp/src/models/vgg/vgg_backbone.py @@ -29,7 +29,7 @@ class VGGBackbone(Backbone): Args: stackwise_num_repeats: list of ints, number of repeated convolutional - blocks per dense block. For VGG16 this is [2, 2, 3, 3, 3] and for + blocks per VGG block. For VGG16 this is [2, 2, 3, 3, 3] and for VGG19 this is [2, 2, 4, 4, 4]. include_rescaling: bool, whether to rescale the inputs. If set to True, inputs will be passed through a `Rescaling(1/255.0)` layer. From f8c92c28fa1d980b167b559e5a416284c1bf08db Mon Sep 17 00:00:00 2001 From: divyashreepathihalli Date: Wed, 7 Aug 2024 22:43:49 +0000 Subject: [PATCH 19/25] address review comments --- keras_nlp/src/models/vgg/vgg_backbone.py | 27 ++++++++----------- keras_nlp/src/models/vgg/vgg_backbone_test.py | 9 ++++--- .../src/models/vgg/vgg_image_classifier.py | 17 ++++++------ .../models/vgg/vgg_image_classifier_test.py | 7 ++--- 4 files changed, 29 insertions(+), 31 deletions(-) diff --git a/keras_nlp/src/models/vgg/vgg_backbone.py b/keras_nlp/src/models/vgg/vgg_backbone.py index 5a1c37baf4..0724b996c6 100644 --- a/keras_nlp/src/models/vgg/vgg_backbone.py +++ b/keras_nlp/src/models/vgg/vgg_backbone.py @@ -31,6 +31,9 @@ class VGGBackbone(Backbone): stackwise_num_repeats: list of ints, number of repeated convolutional blocks per VGG block. For VGG16 this is [2, 2, 3, 3, 3] and for VGG19 this is [2, 2, 4, 4, 4]. + stackwise_filters: list of ints, filter size for convolutional + blocks per VGG block. For both VGG16 and VGG19 this is [ + 64, 128, 256, 512, 512]. include_rescaling: bool, whether to rescale the inputs. If set to True, inputs will be passed through a `Rescaling(1/255.0)` layer. input_shape: tuple, optional shape tuple, defaults to (224, 224, 3). @@ -68,41 +71,30 @@ class VGGBackbone(Backbone): def __init__( self, stackwise_num_repeats, + stackwise_num_filters, include_rescaling, - input_shape=(224, 224, 3), + input_image_shape=(224, 224, 3), pooling=None, **kwargs, ): # === Functional Model === - img_input = keras.layers.Input(shape=input_shape) + img_input = keras.layers.Input(shape=input_image_shape) x = img_input if include_rescaling: x = layers.Rescaling(scale=1 / 255.0)(x) - filters_size = 64 for stack_index in range(len(stackwise_num_repeats) - 1): x = apply_vgg_block( x=x, num_layers=stackwise_num_repeats[stack_index], - filters=filters_size, + filters=stackwise_num_filters[stack_index], kernel_size=(3, 3), activation="relu", padding="same", max_pool=True, name=f"block{stack_index + 1}", ) - filters_size = filters_size * 2 - x = apply_vgg_block( - x=x, - num_layers=stackwise_num_repeats[-1], - filters=512, - kernel_size=(3, 3), - activation="relu", - padding="same", - max_pool=True, - name=f"block{len(stackwise_num_repeats)}", - ) if pooling == "avg": x = layers.GlobalAveragePooling2D()(x) elif pooling == "max": @@ -114,13 +106,16 @@ def __init__( self.include_rescaling = include_rescaling self.pooling = pooling self.stackwise_num_repeats = stackwise_num_repeats + self.stackwise_num_filters = stackwise_num_filters + self.input_image_shape = input_image_shape def get_config(self): return { "stackwise_num_repeats": self.stackwise_num_repeats, + "stackwise_num_filters": self.stackwise_num_filters, "include_rescaling": self.include_rescaling, - "input_shape": self.input_shape[1:], "trainable": self.trainable, + "input_image_shape": self.input_image_shape, } diff --git a/keras_nlp/src/models/vgg/vgg_backbone_test.py b/keras_nlp/src/models/vgg/vgg_backbone_test.py index 428d270442..badd890224 100644 --- a/keras_nlp/src/models/vgg/vgg_backbone_test.py +++ b/keras_nlp/src/models/vgg/vgg_backbone_test.py @@ -22,19 +22,20 @@ class VGGBackboneTest(TestCase): def setUp(self): self.init_kwargs = { - "stackwise_num_repeats": [2, 2, 3, 3, 3], - "input_shape": (224, 224, 3), + "stackwise_num_repeats": [2, 3, 3], + "stackwise_num_filters": [8, 64, 64], + "input_image_shape": (16, 16, 3), "include_rescaling": False, "pooling": "avg", } - self.input_data = np.ones((2, 224, 224, 3), dtype="float32") + self.input_data = np.ones((2, 16, 16, 3), dtype="float32") def test_backbone_basics(self): self.run_backbone_test( cls=VGGBackbone, init_kwargs=self.init_kwargs, input_data=self.input_data, - expected_output_shape=(2, 512), + expected_output_shape=(2, 64), run_mixed_precision_check=False, is_cv_backbone=True, ) diff --git a/keras_nlp/src/models/vgg/vgg_image_classifier.py b/keras_nlp/src/models/vgg/vgg_image_classifier.py index 7b5a2fdb5c..5dc9aee6c5 100644 --- a/keras_nlp/src/models/vgg/vgg_image_classifier.py +++ b/keras_nlp/src/models/vgg/vgg_image_classifier.py @@ -87,9 +87,7 @@ def __init__( # === Layers === self.backbone = backbone - self.pooling = pooling - self.activation = activation - if self.pooling == "avg": + if pooling == "avg": self.pooling_layer = keras.layers.GlobalAveragePooling2D( name="avg_pool" ) @@ -101,16 +99,17 @@ def __init__( raise ValueError( f'`pooling` must be one of "avg", "max". Received: {pooling}.' ) + self.dense = keras.layers.Dense( + num_classes, + activation=activation, + name="predictions", + ) # === Functional Model === inputs = self.backbone.input x = self.backbone(inputs) x = self.pooling_layer(x) - outputs = keras.layers.Dense( - num_classes, - activation=activation, - name="predictions", - )(x) + outputs = self.dense(x) # Instantiate using Functional API Model constructor super().__init__( @@ -121,6 +120,8 @@ def __init__( # === Config === self.num_classes = num_classes + self.pooling = pooling + self.activation = activation def get_config(self): # Backbone serialized in `super` diff --git a/keras_nlp/src/models/vgg/vgg_image_classifier_test.py b/keras_nlp/src/models/vgg/vgg_image_classifier_test.py index 41fc58fbe5..9646d32382 100644 --- a/keras_nlp/src/models/vgg/vgg_image_classifier_test.py +++ b/keras_nlp/src/models/vgg/vgg_image_classifier_test.py @@ -22,11 +22,12 @@ class VGGImageClassifierTest(TestCase): def setUp(self): # Setup model. - self.images = np.ones((2, 224, 224, 3), dtype="float32") + self.images = np.ones((2, 4, 4, 3), dtype="float32") self.labels = [0, 3] self.backbone = VGGBackbone( - stackwise_num_repeats=[2, 2, 3, 3, 3], - input_shape=(224, 224, 3), + stackwise_num_repeats=[2, 4, 4], + stackwise_num_filters=[2, 16, 16], + input_image_shape=(4, 4, 3), include_rescaling=False, pooling="avg", ) From 7dae882c4390567078006739287cdf82204ff8ef Mon Sep 17 00:00:00 2001 From: divyashreepathihalli Date: Wed, 7 Aug 2024 22:47:34 +0000 Subject: [PATCH 20/25] code reformat --- keras_nlp/api/models/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/keras_nlp/api/models/__init__.py b/keras_nlp/api/models/__init__.py index 4fb3b3cf00..ea26bc0e14 100644 --- a/keras_nlp/api/models/__init__.py +++ b/keras_nlp/api/models/__init__.py @@ -129,6 +129,7 @@ GPTNeoXPreprocessor, ) from keras_nlp.src.models.gpt_neo_x.gpt_neo_x_tokenizer import GPTNeoXTokenizer +from keras_nlp.src.models.image_classifier import ImageClassifier from keras_nlp.src.models.llama3.llama3_backbone import Llama3Backbone from keras_nlp.src.models.llama3.llama3_causal_lm import Llama3CausalLM from keras_nlp.src.models.llama3.llama3_causal_lm_preprocessor import ( @@ -194,6 +195,7 @@ from keras_nlp.src.models.t5.t5_backbone import T5Backbone from keras_nlp.src.models.t5.t5_tokenizer import T5Tokenizer from keras_nlp.src.models.task import Task +from keras_nlp.src.models.vgg.vgg_backbone import VGGBackbone from keras_nlp.src.models.whisper.whisper_audio_feature_extractor import ( WhisperAudioFeatureExtractor, ) From cbf5ed73b240c0c7dc3a97ae312895f7157cf707 Mon Sep 17 00:00:00 2001 From: divyashreepathihalli Date: Wed, 7 Aug 2024 23:02:45 +0000 Subject: [PATCH 21/25] update the configs --- keras_nlp/src/models/vgg/vgg_backbone.py | 5 +++-- keras_nlp/src/models/vgg/vgg_image_classifier_test.py | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/keras_nlp/src/models/vgg/vgg_backbone.py b/keras_nlp/src/models/vgg/vgg_backbone.py index 0724b996c6..4bac2a4693 100644 --- a/keras_nlp/src/models/vgg/vgg_backbone.py +++ b/keras_nlp/src/models/vgg/vgg_backbone.py @@ -103,11 +103,11 @@ def __init__( super().__init__(inputs=img_input, outputs=x, **kwargs) # === Config === - self.include_rescaling = include_rescaling - self.pooling = pooling self.stackwise_num_repeats = stackwise_num_repeats self.stackwise_num_filters = stackwise_num_filters + self.include_rescaling = include_rescaling self.input_image_shape = input_image_shape + self.pooling = pooling def get_config(self): return { @@ -116,6 +116,7 @@ def get_config(self): "include_rescaling": self.include_rescaling, "trainable": self.trainable, "input_image_shape": self.input_image_shape, + "pooling": self.pooling, } diff --git a/keras_nlp/src/models/vgg/vgg_image_classifier_test.py b/keras_nlp/src/models/vgg/vgg_image_classifier_test.py index 9646d32382..119d642b73 100644 --- a/keras_nlp/src/models/vgg/vgg_image_classifier_test.py +++ b/keras_nlp/src/models/vgg/vgg_image_classifier_test.py @@ -34,6 +34,8 @@ def setUp(self): self.init_kwargs = { "backbone": self.backbone, "num_classes": 4, + "pooling": "avg", + "activation": "softmax", } self.train_data = ( self.images, From 483e7bdc70726b914fe8b3130de42d93773f69da Mon Sep 17 00:00:00 2001 From: divyashreepathihalli Date: Wed, 7 Aug 2024 23:23:30 +0000 Subject: [PATCH 22/25] address review comments --- keras_nlp/src/models/vgg/vgg_backbone_test.py | 1 - keras_nlp/src/models/vgg/vgg_image_classifier.py | 9 +++++---- keras_nlp/src/models/vgg/vgg_image_classifier_test.py | 4 +++- keras_nlp/src/tests/test_case.py | 3 +-- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/keras_nlp/src/models/vgg/vgg_backbone_test.py b/keras_nlp/src/models/vgg/vgg_backbone_test.py index badd890224..05ed33ba0f 100644 --- a/keras_nlp/src/models/vgg/vgg_backbone_test.py +++ b/keras_nlp/src/models/vgg/vgg_backbone_test.py @@ -37,7 +37,6 @@ def test_backbone_basics(self): input_data=self.input_data, expected_output_shape=(2, 64), run_mixed_precision_check=False, - is_cv_backbone=True, ) @pytest.mark.large diff --git a/keras_nlp/src/models/vgg/vgg_image_classifier.py b/keras_nlp/src/models/vgg/vgg_image_classifier.py index 5dc9aee6c5..b7cdebfa62 100644 --- a/keras_nlp/src/models/vgg/vgg_image_classifier.py +++ b/keras_nlp/src/models/vgg/vgg_image_classifier.py @@ -13,10 +13,12 @@ # limitations under the License. import keras +from keras_nlp.src.api_export import keras_nlp_export from keras_nlp.src.models.image_classifier import ImageClassifier from keras_nlp.src.models.vgg import VGGBackbone +@keras_nlp_export("keras_nlp.models.VGGImageClassifier") class VGGImageClassifier(ImageClassifier): """VGG16 image classifier task model. @@ -62,6 +64,7 @@ class VGGImageClassifier(ImageClassifier): backbone = keras_nlp.models.VGGBackbone( stackwise_num_repeats = [2, 2, 3, 3, 3], + stackwise_num_filters = [64, 128, 256, 512, 512], input_shape = (224, 224, 3), include_rescaling = False, pooling = "avg", @@ -84,7 +87,6 @@ def __init__( activation="softmax", **kwargs, ): - # === Layers === self.backbone = backbone if pooling == "avg": @@ -99,7 +101,7 @@ def __init__( raise ValueError( f'`pooling` must be one of "avg", "max". Received: {pooling}.' ) - self.dense = keras.layers.Dense( + self.output_dense = keras.layers.Dense( num_classes, activation=activation, name="predictions", @@ -109,7 +111,7 @@ def __init__( inputs = self.backbone.input x = self.backbone(inputs) x = self.pooling_layer(x) - outputs = self.dense(x) + outputs = self.output_dense(x) # Instantiate using Functional API Model constructor super().__init__( @@ -128,7 +130,6 @@ def get_config(self): config = super().get_config() config.update( { - "backbone": keras.layers.serialize(self.backbone), "num_classes": self.num_classes, "pooling": self.pooling, "activation": self.activation, diff --git a/keras_nlp/src/models/vgg/vgg_image_classifier_test.py b/keras_nlp/src/models/vgg/vgg_image_classifier_test.py index 119d642b73..f0b70e40ba 100644 --- a/keras_nlp/src/models/vgg/vgg_image_classifier_test.py +++ b/keras_nlp/src/models/vgg/vgg_image_classifier_test.py @@ -43,7 +43,9 @@ def setUp(self): ) def test_classifier_basics(self): - pytest.skip(reason="enable after preprocessor flow is figured out") + pytest.skip( + reason="TODO: enable after preprocessor flow is figured out" + ) self.run_task_test( cls=VGGImageClassifier, init_kwargs=self.init_kwargs, diff --git a/keras_nlp/src/tests/test_case.py b/keras_nlp/src/tests/test_case.py index 4ec9efecd2..fc1ce77e1e 100644 --- a/keras_nlp/src/tests/test_case.py +++ b/keras_nlp/src/tests/test_case.py @@ -406,7 +406,6 @@ def run_backbone_test( variable_length_data=None, run_mixed_precision_check=True, run_quantization_check=True, - is_cv_backbone=False, ): """Run basic tests for a backbone, including compilation.""" backbone = cls(**init_kwargs) @@ -420,7 +419,7 @@ def run_backbone_test( self.assertEqual(output[key].shape, expected_output_shape[key]) else: self.assertEqual(output.shape, expected_output_shape) - if not is_cv_backbone: + if backbone.token_embedding is not None: # Check we can embed tokens eagerly. output = backbone.token_embedding(ops.zeros((2, 3), dtype="int32")) From d8a6745b20d21ccfce088cbdaeb1ab5c0d3add56 Mon Sep 17 00:00:00 2001 From: divyashreepathihalli Date: Thu, 8 Aug 2024 00:04:43 +0000 Subject: [PATCH 23/25] fix task saved model test --- keras_nlp/src/models/vgg/vgg_backbone.py | 8 ++++---- .../src/models/vgg/vgg_image_classifier.py | 18 ++---------------- .../models/vgg/vgg_image_classifier_test.py | 5 ++--- 3 files changed, 8 insertions(+), 23 deletions(-) diff --git a/keras_nlp/src/models/vgg/vgg_backbone.py b/keras_nlp/src/models/vgg/vgg_backbone.py index 4bac2a4693..497381c0fc 100644 --- a/keras_nlp/src/models/vgg/vgg_backbone.py +++ b/keras_nlp/src/models/vgg/vgg_backbone.py @@ -31,7 +31,7 @@ class VGGBackbone(Backbone): stackwise_num_repeats: list of ints, number of repeated convolutional blocks per VGG block. For VGG16 this is [2, 2, 3, 3, 3] and for VGG19 this is [2, 2, 4, 4, 4]. - stackwise_filters: list of ints, filter size for convolutional + stackwise_num_filters: list of ints, filter size for convolutional blocks per VGG block. For both VGG16 and VGG19 this is [ 64, 128, 256, 512, 512]. include_rescaling: bool, whether to rescale the inputs. If set to @@ -60,13 +60,14 @@ class VGGBackbone(Backbone): # Randomly initialized VGG backbone with a custom config. model = keras_nlp.models.VGGBackbone( stackwise_num_repeats = [2, 2, 3, 3, 3], + stackwise_num_filters = [64, 128, 256, 512, 512], input_shape = (224, 224, 3), include_rescaling = False, pooling = "avg", ) model(input_data) ``` - """ # noqa: E501 + """ def __init__( self, @@ -74,7 +75,7 @@ def __init__( stackwise_num_filters, include_rescaling, input_image_shape=(224, 224, 3), - pooling=None, + pooling="avg", **kwargs, ): @@ -114,7 +115,6 @@ def get_config(self): "stackwise_num_repeats": self.stackwise_num_repeats, "stackwise_num_filters": self.stackwise_num_filters, "include_rescaling": self.include_rescaling, - "trainable": self.trainable, "input_image_shape": self.input_image_shape, "pooling": self.pooling, } diff --git a/keras_nlp/src/models/vgg/vgg_image_classifier.py b/keras_nlp/src/models/vgg/vgg_image_classifier.py index b7cdebfa62..82954b7606 100644 --- a/keras_nlp/src/models/vgg/vgg_image_classifier.py +++ b/keras_nlp/src/models/vgg/vgg_image_classifier.py @@ -83,24 +83,13 @@ def __init__( self, backbone, num_classes, - pooling="avg", activation="softmax", + preprocessor=None, # adding this dummy arg for saved model test + # TODO: once preprocessor flow is figured out, this needs to be updated **kwargs, ): # === Layers === self.backbone = backbone - if pooling == "avg": - self.pooling_layer = keras.layers.GlobalAveragePooling2D( - name="avg_pool" - ) - elif self.pooling == "max": - self.pooling_layer = keras.layers.GlobalMaxPooling2D( - name="max_pool" - ) - else: - raise ValueError( - f'`pooling` must be one of "avg", "max". Received: {pooling}.' - ) self.output_dense = keras.layers.Dense( num_classes, activation=activation, @@ -110,7 +99,6 @@ def __init__( # === Functional Model === inputs = self.backbone.input x = self.backbone(inputs) - x = self.pooling_layer(x) outputs = self.output_dense(x) # Instantiate using Functional API Model constructor @@ -122,7 +110,6 @@ def __init__( # === Config === self.num_classes = num_classes - self.pooling = pooling self.activation = activation def get_config(self): @@ -131,7 +118,6 @@ def get_config(self): config.update( { "num_classes": self.num_classes, - "pooling": self.pooling, "activation": self.activation, } ) diff --git a/keras_nlp/src/models/vgg/vgg_image_classifier_test.py b/keras_nlp/src/models/vgg/vgg_image_classifier_test.py index f0b70e40ba..4a2573e496 100644 --- a/keras_nlp/src/models/vgg/vgg_image_classifier_test.py +++ b/keras_nlp/src/models/vgg/vgg_image_classifier_test.py @@ -29,12 +29,11 @@ def setUp(self): stackwise_num_filters=[2, 16, 16], input_image_shape=(4, 4, 3), include_rescaling=False, - pooling="avg", + pooling="max", ) self.init_kwargs = { "backbone": self.backbone, - "num_classes": 4, - "pooling": "avg", + "num_classes": 2, "activation": "softmax", } self.train_data = ( From 5f223e5e9ff5e9078f437b35a453eeb82c981957 Mon Sep 17 00:00:00 2001 From: divyashreepathihalli Date: Thu, 8 Aug 2024 00:08:31 +0000 Subject: [PATCH 24/25] update init --- keras_nlp/src/models/vgg/__init__.py | 2 -- keras_nlp/src/models/vgg/vgg_image_classifier.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/keras_nlp/src/models/vgg/__init__.py b/keras_nlp/src/models/vgg/__init__.py index b84103ad36..3364a6bd16 100644 --- a/keras_nlp/src/models/vgg/__init__.py +++ b/keras_nlp/src/models/vgg/__init__.py @@ -11,5 +11,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -from keras_nlp.src.models.vgg.vgg_backbone import VGGBackbone diff --git a/keras_nlp/src/models/vgg/vgg_image_classifier.py b/keras_nlp/src/models/vgg/vgg_image_classifier.py index 82954b7606..a26fbfbc30 100644 --- a/keras_nlp/src/models/vgg/vgg_image_classifier.py +++ b/keras_nlp/src/models/vgg/vgg_image_classifier.py @@ -15,7 +15,7 @@ from keras_nlp.src.api_export import keras_nlp_export from keras_nlp.src.models.image_classifier import ImageClassifier -from keras_nlp.src.models.vgg import VGGBackbone +from keras_nlp.src.models.vgg.vgg_backbone import VGGBackbone @keras_nlp_export("keras_nlp.models.VGGImageClassifier") From 901f7aedc775fa37a37dbd6b90f1505d10e3f288 Mon Sep 17 00:00:00 2001 From: divyashreepathihalli Date: Thu, 8 Aug 2024 00:09:45 +0000 Subject: [PATCH 25/25] code reformatted --- keras_nlp/api/models/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/keras_nlp/api/models/__init__.py b/keras_nlp/api/models/__init__.py index ea26bc0e14..41f1a47284 100644 --- a/keras_nlp/api/models/__init__.py +++ b/keras_nlp/api/models/__init__.py @@ -196,6 +196,7 @@ from keras_nlp.src.models.t5.t5_tokenizer import T5Tokenizer from keras_nlp.src.models.task import Task from keras_nlp.src.models.vgg.vgg_backbone import VGGBackbone +from keras_nlp.src.models.vgg.vgg_image_classifier import VGGImageClassifier from keras_nlp.src.models.whisper.whisper_audio_feature_extractor import ( WhisperAudioFeatureExtractor, )