From 3858abf5e143bb7f91a7c7ca02a1de4c7d0c6b9c Mon Sep 17 00:00:00 2001
From: Sachin Prasad <sachinprasad@google.com>
Date: Wed, 14 Aug 2024 19:20:54 +0000
Subject: [PATCH 1/4] Add CSP DarkNet

---
 keras_nlp/src/models/csp_darknet/__init__.py  |  13 +
 .../csp_darknet/csp_darknet_backbone.py       | 394 ++++++++++++++++++
 .../csp_darknet/csp_darknet_backbone_test.py  |  47 +++
 .../csp_darknet_image_classifier.py           | 128 ++++++
 .../csp_darknet_image_classifier_test.py      |  61 +++
 5 files changed, 643 insertions(+)
 create mode 100644 keras_nlp/src/models/csp_darknet/__init__.py
 create mode 100644 keras_nlp/src/models/csp_darknet/csp_darknet_backbone.py
 create mode 100644 keras_nlp/src/models/csp_darknet/csp_darknet_backbone_test.py
 create mode 100644 keras_nlp/src/models/csp_darknet/csp_darknet_image_classifier.py
 create mode 100644 keras_nlp/src/models/csp_darknet/csp_darknet_image_classifier_test.py

diff --git a/keras_nlp/src/models/csp_darknet/__init__.py b/keras_nlp/src/models/csp_darknet/__init__.py
new file mode 100644
index 0000000000..3364a6bd16
--- /dev/null
+++ b/keras_nlp/src/models/csp_darknet/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2024 The KerasNLP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/keras_nlp/src/models/csp_darknet/csp_darknet_backbone.py b/keras_nlp/src/models/csp_darknet/csp_darknet_backbone.py
new file mode 100644
index 0000000000..a406fa33b3
--- /dev/null
+++ b/keras_nlp/src/models/csp_darknet/csp_darknet_backbone.py
@@ -0,0 +1,394 @@
+# Copyright 2024 The KerasNLP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import keras
+from keras import layers
+
+from keras_nlp.src.api_export import keras_nlp_export
+from keras_nlp.src.models.backbone import Backbone
+
+@keras_nlp_export("keras_nlp.models.CSPDarkNetBackbone")
+class CSPDarkNetBackbone(Backbone):
+    """
+    This class represents Keras Backbone of CSPDarkNet model.
+    
+    This class implements a CSPDarkNet backbone as described in
+    [CSPNet: A New Backbone that can Enhance Learning Capability of CNN](https://arxiv.org/abs/1911.11929).
+    
+    Args:
+        stackwise_num_filters:  A list of ints, filter size for each dark
+            level in the model.
+        stackwise_depth: A list of ints, the depth for each dark level in the
+            model.
+        include_rescaling: boolean. If `True`, rescale the input using
+            `Rescaling(1 / 255.0)` layer. If `False`, do nothing. Defaults to
+            `True`.
+        use_depthwise: bool, whether a `DarknetConvBlockDepthwise` should be
+            used over a `DarknetConvBlock`, defaults to False.
+        input_image_shape: tuple. The input shape without the batch size.
+            Defaults to `(None, None, 3)`.
+            
+    Examples:
+    ```python
+    input_data = np.ones(shape=(8, 224, 224, 3))
+
+    # Pretrained backbone
+    model = keras_nlp.models.CSPDarkNetBackbone.from_preset(
+        "csp_darknet_tiny_imagenet"
+    )
+    model(input_data)
+
+    # Randomly initialized backbone with a custom config
+    model = keras_nlp.models.CSPDarkNetBackbone(
+        stackwise_num_filters=[128, 256, 512, 1024],
+        stackwise_depth=[3, 9, 9, 3],
+        include_rescaling=False,    
+    )
+    model(input_data)
+    ```
+    """
+    
+    def __init__(
+        self,
+        stackwise_num_filters,
+        stackwise_depth,
+        include_rescaling,
+        use_depthwise = False,
+        input_image_shape = (224, 224, 3),
+        **kwargs,
+    ):
+        
+        # === Functional Model ===
+        apply_ConvBlock = (
+            apply_DarknetConvBlockDepthwise if use_depthwise else apply_DarknetConvBlock
+        )
+        base_channels = stackwise_num_filters[0] // 2
+        
+        image_input = layers.Input(shape=input_image_shape)
+        x = image_input
+        if include_rescaling:
+            x = layers.Rescaling(scale=1 / 255.0)(x)
+            
+        x = apply_focus(name="stem_focus")(x)
+        x = apply_DarknetConvBlock(
+            base_channels, kernel_size=3, strides=1, name="stem_conv"
+        )(x)
+        for index, (channels, depth) in enumerate(
+            zip(stackwise_num_filters, stackwise_depth)
+        ):
+            x = apply_ConvBlock(
+                channels,
+                kernel_size=3,
+                strides=2,
+                name=f"dark{index + 2}_conv",
+            )(x)
+
+            if index == len(stackwise_depth) - 1:
+                x = apply_SpatialPyramidPoolingBottleneck(
+                    channels,
+                    hidden_filters=channels // 2,
+                    name=f"dark{index + 2}_spp",
+                )(x)
+
+            x = apply_CrossStagePartial(
+                channels,
+                num_bottlenecks=depth,
+                use_depthwise=use_depthwise,
+                residual=(index != len(stackwise_depth) - 1),
+                name=f"dark{index + 2}_csp",
+            )(x)
+
+        super().__init__(inputs=image_input, outputs=x, **kwargs)
+
+        # === Config ===
+        self.stackwise_num_filters = stackwise_num_filters
+        self.stackwise_depth = stackwise_depth
+        self.include_rescaling = include_rescaling
+        self.use_depthwise = use_depthwise
+        self.input_image_shape = input_image_shape
+
+    def get_config(self):
+        config = super().get_config()
+        config.update(
+            {
+                "stackwise_num_filters": self.stackwise_num_filters,
+                "stackwise_depth": self.stackwise_depth,
+                "include_rescaling": self.include_rescaling,
+                "use_depthwise": self.use_depthwise,
+                "input_image_shape": self.input_image_shape,
+            }
+        )
+        return config  
+        
+def apply_focus(name=None):
+    """A block used in CSPDarknet to focus information into channels of the
+    image.
+
+    If the dimensions of a batch input is (batch_size, width, height, channels),
+    this layer converts the image into size (batch_size, width/2, height/2,
+    4*channels). See [the original discussion on YoloV5 Focus Layer](https://github.com/ultralytics/yolov5/discussions/3181).
+
+    Args:
+        name: the name for the lambda layer used in the block.
+
+    Returns:
+        a function that takes an input Tensor representing a Focus layer.
+    """
+
+    def apply(x):
+        return layers.Concatenate(name=name)(
+            [
+                x[..., ::2, ::2, :],
+                x[..., 1::2, ::2, :],
+                x[..., ::2, 1::2, :],
+                x[..., 1::2, 1::2, :],
+            ],
+        )
+
+    return apply
+
+def apply_DarknetConvBlock(
+    filters, kernel_size, strides, use_bias=False, activation="silu", name=None
+):
+    """
+    The basic conv block used in Darknet. Applies Conv2D followed by a
+    BatchNorm.
+
+    Args:
+        filters: Integer, the dimensionality of the output space (i.e. the
+            number of output filters in the convolution).
+        kernel_size: An integer or tuple/list of 2 integers, specifying the
+            height and width of the 2D convolution window. Can be a single
+            integer to specify the same value both dimensions.
+        strides: An integer or tuple/list of 2 integers, specifying the strides
+            of the convolution along the height and width. Can be a single
+            integer to the same value both dimensions.
+        use_bias: Boolean, whether the layer uses a bias vector.
+        activation: the activation applied after the BatchNorm layer. One of
+            "silu", "relu" or "leaky_relu", defaults to "silu".
+        name: the prefix for the layer names used in the block.
+    """
+    if name is None:
+        name = f"conv_block{keras.backend.get_uid('conv_block')}"
+    
+    def apply(inputs):
+        x = layers.Conv2D(
+            filters,
+            kernel_size,
+            strides,
+            padding="same",
+            use_bias=use_bias,
+            name=name + "_conv"
+        )(inputs)
+        
+        x = layers.BatchNormalization(name=name + "_bn")(x)
+        
+        if activation == "silu":
+            x = layers.Lambda(lambda x: keras.activations.silu(x))(x)
+        elif activation == "relu":
+            x = layers.ReLU()(x)
+        elif activation == "leaky_relu":
+            x = layers.LeakyReLU(0.1)(x)
+        
+        return x
+    
+    return apply
+
+def apply_DarknetConvBlockDepthwise(
+    filters, kernel_size, strides, activation="silu", name=None
+):
+    """
+    The depthwise conv block used in CSPDarknet.
+
+    Args:
+        filters: Integer, the dimensionality of the output space (i.e. the
+            number of output filters in the final convolution).
+        kernel_size: An integer or tuple/list of 2 integers, specifying the
+            height and width of the 2D convolution window. Can be a single
+            integer to specify the same value both dimensions.
+        strides: An integer or tuple/list of 2 integers, specifying the strides
+            of the convolution along the height and width. Can be a single
+            integer to the same value both dimensions.
+        activation: the activation applied after the final layer. One of "silu",
+            "relu" or "leaky_relu", defaults to "silu".
+        name: the prefix for the layer names used in the block.
+
+    """
+    if name is None:
+        name = f"conv_block{keras.backend.get_uid('conv_block')}"
+    
+    def apply(inputs):
+        x = layers.DepthwiseConv2D(
+            kernel_size, strides, padding="same", use_bias=False
+        )(inputs)
+        x = layers.BatchNormalization()(x)
+        
+        if activation == "silu":
+            x = layers.Lambda(lambda x: keras.activations.swish(x))(x)
+        elif activation == "relu":
+            x = layers.ReLU()(x)
+        elif activation == "leaky_relu":
+            x = layers.LeakyReLU(0.1)(x)
+        
+        x = apply_DarknetConvBlock(
+            filters, kernel_size=1, strides=1, activation=activation
+        )(x)
+        
+        return x
+    
+    return apply
+
+def apply_SpatialPyramidPoolingBottleneck(
+    filters,
+    hidden_filters=None,
+    kernel_sizes=(5, 9, 13),
+    activation="silu",
+    name=None,
+):
+    """
+    Spatial pyramid pooling layer used in YOLOv3-SPP
+
+    Args:
+        filters: Integer, the dimensionality of the output spaces (i.e. the
+            number of output filters in used the blocks).
+        hidden_filters: Integer, the dimensionality of the intermediate
+            bottleneck space (i.e. the number of output filters in the
+            bottleneck convolution). If None, it will be equal to filters.
+            Defaults to None.
+        kernel_sizes: A list or tuple representing all the pool sizes used for
+            the pooling layers, defaults to (5, 9, 13).
+        activation: Activation for the conv layers, defaults to "silu".
+        name: the prefix for the layer names used in the block.
+
+    Returns:
+        a function that takes an input Tensor representing an
+        SpatialPyramidPoolingBottleneck.
+    """
+    if name is None:
+        name = f"spp{keras.backend.get_uid('spp')}"
+
+    if hidden_filters is None:
+        hidden_filters = filters
+
+    def apply(x):
+        x = apply_DarknetConvBlock(
+            hidden_filters,
+            kernel_size=1,
+            strides=1,
+            activation=activation,
+            name=f"{name}_conv1",
+        )(x)
+        x = [x]
+
+        for kernel_size in kernel_sizes:
+            x.append(
+                layers.MaxPooling2D(
+                    kernel_size,
+                    strides=1,
+                    padding="same",
+                    name=f"{name}_maxpool_{kernel_size}",
+                )(x[0])
+            )
+
+        x = layers.Concatenate(name=f"{name}_concat")(x)
+        x = apply_DarknetConvBlock(
+            filters,
+            kernel_size=1,
+            strides=1,
+            activation=activation,
+            name=f"{name}_conv2",
+        )(x)
+
+        return x
+
+    return apply
+    
+def apply_CrossStagePartial(
+    filters,
+    num_bottlenecks,
+    residual=True,
+    use_depthwise=False,
+    activation="silu",
+    name=None
+):
+    """A block used in Cross Stage Partial Darknet.
+
+    Args:
+        filters: Integer, the dimensionality of the output space (i.e. the
+            number of output filters in the final convolution).
+        num_bottlenecks: an integer representing the number of blocks added in
+            the layer bottleneck.
+        residual: a boolean representing whether the value tensor before the
+            bottleneck should be added to the output of the bottleneck as a
+            residual, defaults to True.
+        use_depthwise: a boolean value used to decide whether a depthwise conv
+            block should be used over a regular darknet block, defaults to
+            False.
+        activation: the activation applied after the final layer. One of "silu",
+            "relu" or "leaky_relu", defaults to "silu".
+    """
+    
+    if name is None:
+        name = f"cross_stage_partial_{keras.backend.get_uid('cross_stage_partial')}"
+
+    def apply(inputs):
+        hidden_channels = filters // 2
+        ConvBlock = apply_DarknetConvBlockDepthwise if use_depthwise else apply_DarknetConvBlock
+
+        x1 = apply_DarknetConvBlock(
+            hidden_channels,
+            kernel_size=1,
+            strides=1,
+            activation=activation,
+            name=f"{name}_conv1"
+        )(inputs)
+
+        x2 = apply_DarknetConvBlock(
+            hidden_channels,
+            kernel_size=1,
+            strides=1,
+            activation=activation,
+            name=f"{name}_conv2"
+        )(inputs)
+
+        for i in range(num_bottlenecks):
+            residual_x = x1
+            x1 = apply_DarknetConvBlock(
+                hidden_channels,
+                kernel_size=1,
+                strides=1,
+                activation=activation,
+                name=f"{name}_bottleneck_{i}_conv1"
+            )(x1)
+            x1 = ConvBlock(
+                hidden_channels,
+                kernel_size=3,
+                strides=1,
+                activation=activation,
+                name=f"{name}_bottleneck_{i}_conv2"
+            )(x1)
+            if residual:
+                x1 = layers.Add(name=f"{name}_bottleneck_{i}_add")([residual_x, x1])
+
+        x = layers.Concatenate(name=f"{name}_concat")([x1, x2])
+        x = apply_DarknetConvBlock(
+            filters,
+            kernel_size=1,
+            strides=1,
+            activation=activation,
+            name=f"{name}_conv3"
+        )(x)
+
+        return x
+
+    return apply
diff --git a/keras_nlp/src/models/csp_darknet/csp_darknet_backbone_test.py b/keras_nlp/src/models/csp_darknet/csp_darknet_backbone_test.py
new file mode 100644
index 0000000000..38f64c060e
--- /dev/null
+++ b/keras_nlp/src/models/csp_darknet/csp_darknet_backbone_test.py
@@ -0,0 +1,47 @@
+# Copyright 2024 The KerasNLP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import pytest
+
+from keras_nlp.src.models.csp_darknet.csp_darknet_backbone import CSPDarkNetBackbone
+from keras_nlp.src.tests.test_case import TestCase
+
+class CSPDarkNetBackboneTest(TestCase):
+    def setUp(self):
+        self.init_kwargs = {
+            "stackwise_num_filters":[32, 64, 128, 256],
+            "stackwise_depth":[1, 3, 3, 1],
+            "include_rescaling":False,
+            "use_depthwise":False,
+            "input_image_shape":(224,224,3),
+        }
+        self.input_data = np.ones((2, 224, 224, 3), dtype="float32")
+
+    def test_backbone_basics(self):
+        self.run_backbone_test(
+            cls=CSPDarkNetBackbone,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+            expected_output_shape=(2, 7, 7, 256),
+            run_mixed_precision_check=False,
+        )
+
+    @pytest.mark.large
+    def test_saved_model(self):
+        self.run_model_saving_test(
+            cls=CSPDarkNetBackbone,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
\ No newline at end of file
diff --git a/keras_nlp/src/models/csp_darknet/csp_darknet_image_classifier.py b/keras_nlp/src/models/csp_darknet/csp_darknet_image_classifier.py
new file mode 100644
index 0000000000..60e8699473
--- /dev/null
+++ b/keras_nlp/src/models/csp_darknet/csp_darknet_image_classifier.py
@@ -0,0 +1,128 @@
+# Copyright 2024 The KerasNLP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import keras
+
+from keras_nlp.src.api_export import keras_nlp_export
+from keras_nlp.src.models.image_classifier import ImageClassifier
+from keras_nlp.src.models.csp_darknet.csp_darknet_backbone import CSPDarkNetBackbone
+
+
+@keras_nlp_export("keras_nlp.models.CSPDarkNetImageClassifier")
+class CSPDarkNetImageClassifier(ImageClassifier):
+    """CSPDarkNet image classifier task model.
+
+    Args:
+        backbone: A `keras_nlp.models.CSPDarkNetBackbone` instance.
+        num_classes: int. The number of classes to predict.
+        activation: `None`, str or callable. The activation function to use on
+            the `Dense` layer. Set `activation=None` to return the output
+            logits. Defaults to `"softmax"`.
+
+    To fine-tune with `fit()`, pass a dataset containing tuples of `(x, y)`
+    where `x` is a tensor and `y` is a integer from `[0, num_classes)`.
+    All `ImageClassifier` tasks include a `from_preset()` constructor which can
+    be used to load a pre-trained config and weights.
+
+    Examples:
+
+    Call `predict()` to run inference.
+    ```python
+    # Load preset and train
+    images = np.ones((2, 224, 224, 3), dtype="float32")
+    classifier = keras_nlp.models.CSPDarkNetImageClassifier.from_preset("csp_darknet_tiny_imagenet")
+    classifier.predict(images)
+    ```
+
+    Call `fit()` on a single batch.
+    ```python
+    # Load preset and train
+    images = np.ones((2, 224, 224, 3), dtype="float32")
+    labels = [0, 3]
+    classifier = keras_nlp.models.CSPDarkNetImageClassifier.from_preset("csp_darknet_tiny_imagenet")
+    classifier.fit(x=images, y=labels, batch_size=2)
+    ```
+
+    Call `fit()` with custom loss, optimizer and backbone.
+    ```python
+    classifier = keras_nlp.models.CSPDarkNetImageClassifier.from_preset("csp_darknet_tiny_imagenet")
+    classifier.compile(
+        loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
+        optimizer=keras.optimizers.Adam(5e-5),
+    )
+    classifier.backbone.trainable = False
+    classifier.fit(x=images, y=labels, batch_size=2)
+    ```
+
+    Custom backbone.
+    ```python
+    images = np.ones((2, 224, 224, 3), dtype="float32")
+    labels = [0, 3]
+    backbone = keras_nlp.models.CSPDarkNetBackbone(
+        stackwise_num_filters=[128, 256, 512, 1024],
+        stackwise_depth=[3, 9, 9, 3],
+        include_rescaling=False,
+        use_depthwise = False,
+        input_image_shape = (224, 224, 3),   
+    )
+    classifier = keras_nlp.models.CSPDarkNetImageClassifier(
+        backbone=backbone,
+        num_classes=4,
+    )
+    classifier.fit(x=images, y=labels, batch_size=2)
+    ```
+    """
+
+    backbone_cls = CSPDarkNetBackbone
+
+    def __init__(
+        self,
+        backbone,
+        num_classes,
+        activation="softmax",
+        preprocessor=None,  # adding this dummy arg for saved model test
+        # TODO: once preprocessor flow is figured out, this needs to be updated
+        **kwargs,
+    ):
+        # === Layers ===
+        self.backbone = backbone
+        self.output_dense = keras.layers.Dense(
+            num_classes,
+            activation=activation,
+            name="predictions",
+        )
+
+        # === Functional Model ===
+        inputs = self.backbone.input
+        x = self.backbone(inputs)
+        outputs = self.output_dense(x)
+        super().__init__(
+            inputs=inputs,
+            outputs=outputs,
+            **kwargs,
+        )
+
+        # === Config ===
+        self.num_classes = num_classes
+        self.activation = activation
+
+    def get_config(self):
+        # Backbone serialized in `super`
+        config = super().get_config()
+        config.update(
+            {
+                "num_classes": self.num_classes,
+                "activation": self.activation,
+            }
+        )
+        return config
\ No newline at end of file
diff --git a/keras_nlp/src/models/csp_darknet/csp_darknet_image_classifier_test.py b/keras_nlp/src/models/csp_darknet/csp_darknet_image_classifier_test.py
new file mode 100644
index 0000000000..8fa1b23313
--- /dev/null
+++ b/keras_nlp/src/models/csp_darknet/csp_darknet_image_classifier_test.py
@@ -0,0 +1,61 @@
+# Copyright 2023 The KerasNLP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import pytest
+
+from keras_nlp.src.models.csp_darknet.csp_darknet_backbone import CSPDarkNetBackbone
+from keras_nlp.src.models.csp_darknet.csp_darknet_image_classifier import CSPDarkNetImageClassifier
+from keras_nlp.src.tests.test_case import TestCase
+
+
+class CSPDarkNetImageClassifierTest(TestCase):
+    def setUp(self):
+        # Setup model.
+        self.images = np.ones((2, 16, 16, 3), dtype="float32")
+        self.labels = [0, 3]
+        self.backbone = CSPDarkNetBackbone(
+            stackwise_num_filters = [2, 16, 16],
+            stackwise_depth = [1, 3, 3, 1],
+            include_rescaling = False,
+            use_depthwise = False,
+            input_image_shape = (16, 16, 3),
+        )
+        self.init_kwargs = {
+            "backbone": self.backbone,
+            "num_classes": 2,
+            "activation": "softmax",
+        }
+        self.train_data = (
+            self.images,
+            self.labels,
+        )
+
+    def test_classifier_basics(self):
+        pytest.skip(
+            reason="TODO: enable after preprocessor flow is figured out"
+        )
+        self.run_task_test(
+            cls=CSPDarkNetImageClassifier,
+            init_kwargs=self.init_kwargs,
+            train_data=self.train_data,
+            expected_output_shape=(2, 2),
+        )
+
+    @pytest.mark.large
+    def test_saved_model(self):
+        self.run_model_saving_test(
+            cls=CSPDarkNetImageClassifier,
+            init_kwargs=self.init_kwargs,
+            input_data=self.images,
+        )
\ No newline at end of file

From df66f0d473eb446e21a553d239e02f0c138ac1f1 Mon Sep 17 00:00:00 2001
From: Sachin Prasad <sachinprasad@google.com>
Date: Wed, 14 Aug 2024 19:22:03 +0000
Subject: [PATCH 2/4] Add CSP DarkNet

---
 .../csp_darknet/csp_darknet_backbone.py       | 82 +++++++++++--------
 .../csp_darknet/csp_darknet_backbone_test.py  | 17 ++--
 .../csp_darknet_image_classifier.py           |  8 +-
 .../csp_darknet_image_classifier_test.py      | 20 +++--
 4 files changed, 75 insertions(+), 52 deletions(-)

diff --git a/keras_nlp/src/models/csp_darknet/csp_darknet_backbone.py b/keras_nlp/src/models/csp_darknet/csp_darknet_backbone.py
index a406fa33b3..60a1f84d33 100644
--- a/keras_nlp/src/models/csp_darknet/csp_darknet_backbone.py
+++ b/keras_nlp/src/models/csp_darknet/csp_darknet_backbone.py
@@ -17,14 +17,15 @@
 from keras_nlp.src.api_export import keras_nlp_export
 from keras_nlp.src.models.backbone import Backbone
 
+
 @keras_nlp_export("keras_nlp.models.CSPDarkNetBackbone")
 class CSPDarkNetBackbone(Backbone):
     """
     This class represents Keras Backbone of CSPDarkNet model.
-    
+
     This class implements a CSPDarkNet backbone as described in
     [CSPNet: A New Backbone that can Enhance Learning Capability of CNN](https://arxiv.org/abs/1911.11929).
-    
+
     Args:
         stackwise_num_filters:  A list of ints, filter size for each dark
             level in the model.
@@ -37,7 +38,7 @@ class CSPDarkNetBackbone(Backbone):
             used over a `DarknetConvBlock`, defaults to False.
         input_image_shape: tuple. The input shape without the batch size.
             Defaults to `(None, None, 3)`.
-            
+
     Examples:
     ```python
     input_data = np.ones(shape=(8, 224, 224, 3))
@@ -52,33 +53,35 @@ class CSPDarkNetBackbone(Backbone):
     model = keras_nlp.models.CSPDarkNetBackbone(
         stackwise_num_filters=[128, 256, 512, 1024],
         stackwise_depth=[3, 9, 9, 3],
-        include_rescaling=False,    
+        include_rescaling=False,
     )
     model(input_data)
     ```
     """
-    
+
     def __init__(
         self,
         stackwise_num_filters,
         stackwise_depth,
         include_rescaling,
-        use_depthwise = False,
-        input_image_shape = (224, 224, 3),
+        use_depthwise=False,
+        input_image_shape=(224, 224, 3),
         **kwargs,
     ):
-        
+
         # === Functional Model ===
         apply_ConvBlock = (
-            apply_DarknetConvBlockDepthwise if use_depthwise else apply_DarknetConvBlock
+            apply_DarknetConvBlockDepthwise
+            if use_depthwise
+            else apply_DarknetConvBlock
         )
         base_channels = stackwise_num_filters[0] // 2
-        
+
         image_input = layers.Input(shape=input_image_shape)
         x = image_input
         if include_rescaling:
             x = layers.Rescaling(scale=1 / 255.0)(x)
-            
+
         x = apply_focus(name="stem_focus")(x)
         x = apply_DarknetConvBlock(
             base_channels, kernel_size=3, strides=1, name="stem_conv"
@@ -128,8 +131,9 @@ def get_config(self):
                 "input_image_shape": self.input_image_shape,
             }
         )
-        return config  
-        
+        return config
+
+
 def apply_focus(name=None):
     """A block used in CSPDarknet to focus information into channels of the
     image.
@@ -157,6 +161,7 @@ def apply(x):
 
     return apply
 
+
 def apply_DarknetConvBlock(
     filters, kernel_size, strides, use_bias=False, activation="silu", name=None
 ):
@@ -180,7 +185,7 @@ def apply_DarknetConvBlock(
     """
     if name is None:
         name = f"conv_block{keras.backend.get_uid('conv_block')}"
-    
+
     def apply(inputs):
         x = layers.Conv2D(
             filters,
@@ -188,22 +193,23 @@ def apply(inputs):
             strides,
             padding="same",
             use_bias=use_bias,
-            name=name + "_conv"
+            name=name + "_conv",
         )(inputs)
-        
+
         x = layers.BatchNormalization(name=name + "_bn")(x)
-        
+
         if activation == "silu":
             x = layers.Lambda(lambda x: keras.activations.silu(x))(x)
         elif activation == "relu":
             x = layers.ReLU()(x)
         elif activation == "leaky_relu":
             x = layers.LeakyReLU(0.1)(x)
-        
+
         return x
-    
+
     return apply
 
+
 def apply_DarknetConvBlockDepthwise(
     filters, kernel_size, strides, activation="silu", name=None
 ):
@@ -226,28 +232,29 @@ def apply_DarknetConvBlockDepthwise(
     """
     if name is None:
         name = f"conv_block{keras.backend.get_uid('conv_block')}"
-    
+
     def apply(inputs):
         x = layers.DepthwiseConv2D(
             kernel_size, strides, padding="same", use_bias=False
         )(inputs)
         x = layers.BatchNormalization()(x)
-        
+
         if activation == "silu":
             x = layers.Lambda(lambda x: keras.activations.swish(x))(x)
         elif activation == "relu":
             x = layers.ReLU()(x)
         elif activation == "leaky_relu":
             x = layers.LeakyReLU(0.1)(x)
-        
+
         x = apply_DarknetConvBlock(
             filters, kernel_size=1, strides=1, activation=activation
         )(x)
-        
+
         return x
-    
+
     return apply
 
+
 def apply_SpatialPyramidPoolingBottleneck(
     filters,
     hidden_filters=None,
@@ -312,14 +319,15 @@ def apply(x):
         return x
 
     return apply
-    
+
+
 def apply_CrossStagePartial(
     filters,
     num_bottlenecks,
     residual=True,
     use_depthwise=False,
     activation="silu",
-    name=None
+    name=None,
 ):
     """A block used in Cross Stage Partial Darknet.
 
@@ -337,20 +345,24 @@ def apply_CrossStagePartial(
         activation: the activation applied after the final layer. One of "silu",
             "relu" or "leaky_relu", defaults to "silu".
     """
-    
+
     if name is None:
         name = f"cross_stage_partial_{keras.backend.get_uid('cross_stage_partial')}"
 
     def apply(inputs):
         hidden_channels = filters // 2
-        ConvBlock = apply_DarknetConvBlockDepthwise if use_depthwise else apply_DarknetConvBlock
+        ConvBlock = (
+            apply_DarknetConvBlockDepthwise
+            if use_depthwise
+            else apply_DarknetConvBlock
+        )
 
         x1 = apply_DarknetConvBlock(
             hidden_channels,
             kernel_size=1,
             strides=1,
             activation=activation,
-            name=f"{name}_conv1"
+            name=f"{name}_conv1",
         )(inputs)
 
         x2 = apply_DarknetConvBlock(
@@ -358,7 +370,7 @@ def apply(inputs):
             kernel_size=1,
             strides=1,
             activation=activation,
-            name=f"{name}_conv2"
+            name=f"{name}_conv2",
         )(inputs)
 
         for i in range(num_bottlenecks):
@@ -368,17 +380,19 @@ def apply(inputs):
                 kernel_size=1,
                 strides=1,
                 activation=activation,
-                name=f"{name}_bottleneck_{i}_conv1"
+                name=f"{name}_bottleneck_{i}_conv1",
             )(x1)
             x1 = ConvBlock(
                 hidden_channels,
                 kernel_size=3,
                 strides=1,
                 activation=activation,
-                name=f"{name}_bottleneck_{i}_conv2"
+                name=f"{name}_bottleneck_{i}_conv2",
             )(x1)
             if residual:
-                x1 = layers.Add(name=f"{name}_bottleneck_{i}_add")([residual_x, x1])
+                x1 = layers.Add(name=f"{name}_bottleneck_{i}_add")(
+                    [residual_x, x1]
+                )
 
         x = layers.Concatenate(name=f"{name}_concat")([x1, x2])
         x = apply_DarknetConvBlock(
@@ -386,7 +400,7 @@ def apply(inputs):
             kernel_size=1,
             strides=1,
             activation=activation,
-            name=f"{name}_conv3"
+            name=f"{name}_conv3",
         )(x)
 
         return x
diff --git a/keras_nlp/src/models/csp_darknet/csp_darknet_backbone_test.py b/keras_nlp/src/models/csp_darknet/csp_darknet_backbone_test.py
index 38f64c060e..19784d29cd 100644
--- a/keras_nlp/src/models/csp_darknet/csp_darknet_backbone_test.py
+++ b/keras_nlp/src/models/csp_darknet/csp_darknet_backbone_test.py
@@ -15,17 +15,20 @@
 import numpy as np
 import pytest
 
-from keras_nlp.src.models.csp_darknet.csp_darknet_backbone import CSPDarkNetBackbone
+from keras_nlp.src.models.csp_darknet.csp_darknet_backbone import (
+    CSPDarkNetBackbone,
+)
 from keras_nlp.src.tests.test_case import TestCase
 
+
 class CSPDarkNetBackboneTest(TestCase):
     def setUp(self):
         self.init_kwargs = {
-            "stackwise_num_filters":[32, 64, 128, 256],
-            "stackwise_depth":[1, 3, 3, 1],
-            "include_rescaling":False,
-            "use_depthwise":False,
-            "input_image_shape":(224,224,3),
+            "stackwise_num_filters": [32, 64, 128, 256],
+            "stackwise_depth": [1, 3, 3, 1],
+            "include_rescaling": False,
+            "use_depthwise": False,
+            "input_image_shape": (224, 224, 3),
         }
         self.input_data = np.ones((2, 224, 224, 3), dtype="float32")
 
@@ -44,4 +47,4 @@ def test_saved_model(self):
             cls=CSPDarkNetBackbone,
             init_kwargs=self.init_kwargs,
             input_data=self.input_data,
-        )
\ No newline at end of file
+        )
diff --git a/keras_nlp/src/models/csp_darknet/csp_darknet_image_classifier.py b/keras_nlp/src/models/csp_darknet/csp_darknet_image_classifier.py
index 60e8699473..5bf74ff202 100644
--- a/keras_nlp/src/models/csp_darknet/csp_darknet_image_classifier.py
+++ b/keras_nlp/src/models/csp_darknet/csp_darknet_image_classifier.py
@@ -14,8 +14,10 @@
 import keras
 
 from keras_nlp.src.api_export import keras_nlp_export
+from keras_nlp.src.models.csp_darknet.csp_darknet_backbone import (
+    CSPDarkNetBackbone,
+)
 from keras_nlp.src.models.image_classifier import ImageClassifier
-from keras_nlp.src.models.csp_darknet.csp_darknet_backbone import CSPDarkNetBackbone
 
 
 @keras_nlp_export("keras_nlp.models.CSPDarkNetImageClassifier")
@@ -73,7 +75,7 @@ class CSPDarkNetImageClassifier(ImageClassifier):
         stackwise_depth=[3, 9, 9, 3],
         include_rescaling=False,
         use_depthwise = False,
-        input_image_shape = (224, 224, 3),   
+        input_image_shape = (224, 224, 3),
     )
     classifier = keras_nlp.models.CSPDarkNetImageClassifier(
         backbone=backbone,
@@ -125,4 +127,4 @@ def get_config(self):
                 "activation": self.activation,
             }
         )
-        return config
\ No newline at end of file
+        return config
diff --git a/keras_nlp/src/models/csp_darknet/csp_darknet_image_classifier_test.py b/keras_nlp/src/models/csp_darknet/csp_darknet_image_classifier_test.py
index 8fa1b23313..ceb0cfe96b 100644
--- a/keras_nlp/src/models/csp_darknet/csp_darknet_image_classifier_test.py
+++ b/keras_nlp/src/models/csp_darknet/csp_darknet_image_classifier_test.py
@@ -14,8 +14,12 @@
 import numpy as np
 import pytest
 
-from keras_nlp.src.models.csp_darknet.csp_darknet_backbone import CSPDarkNetBackbone
-from keras_nlp.src.models.csp_darknet.csp_darknet_image_classifier import CSPDarkNetImageClassifier
+from keras_nlp.src.models.csp_darknet.csp_darknet_backbone import (
+    CSPDarkNetBackbone,
+)
+from keras_nlp.src.models.csp_darknet.csp_darknet_image_classifier import (
+    CSPDarkNetImageClassifier,
+)
 from keras_nlp.src.tests.test_case import TestCase
 
 
@@ -25,11 +29,11 @@ def setUp(self):
         self.images = np.ones((2, 16, 16, 3), dtype="float32")
         self.labels = [0, 3]
         self.backbone = CSPDarkNetBackbone(
-            stackwise_num_filters = [2, 16, 16],
-            stackwise_depth = [1, 3, 3, 1],
-            include_rescaling = False,
-            use_depthwise = False,
-            input_image_shape = (16, 16, 3),
+            stackwise_num_filters=[2, 16, 16],
+            stackwise_depth=[1, 3, 3, 1],
+            include_rescaling=False,
+            use_depthwise=False,
+            input_image_shape=(16, 16, 3),
         )
         self.init_kwargs = {
             "backbone": self.backbone,
@@ -58,4 +62,4 @@ def test_saved_model(self):
             cls=CSPDarkNetImageClassifier,
             init_kwargs=self.init_kwargs,
             input_data=self.images,
-        )
\ No newline at end of file
+        )

From e1bca1838ae67b0c592928944edd91ceb944e605 Mon Sep 17 00:00:00 2001
From: Sachin Prasad <sachinprasad@google.com>
Date: Wed, 14 Aug 2024 21:16:57 +0000
Subject: [PATCH 3/4] snake_case function names

---
 keras_nlp/api/models/__init__.py              |  6 +++
 .../csp_darknet/csp_darknet_backbone.py       | 43 +++++++++----------
 .../csp_darknet_image_classifier.py           |  9 ++--
 3 files changed, 33 insertions(+), 25 deletions(-)

diff --git a/keras_nlp/api/models/__init__.py b/keras_nlp/api/models/__init__.py
index 783cfd5087..aca1e28538 100644
--- a/keras_nlp/api/models/__init__.py
+++ b/keras_nlp/api/models/__init__.py
@@ -50,6 +50,12 @@
 from keras_nlp.src.models.bloom.bloom_tokenizer import BloomTokenizer
 from keras_nlp.src.models.causal_lm import CausalLM
 from keras_nlp.src.models.classifier import Classifier
+from keras_nlp.src.models.csp_darknet.csp_darknet_backbone import (
+    CSPDarkNetBackbone,
+)
+from keras_nlp.src.models.csp_darknet.csp_darknet_image_classifier import (
+    CSPDarkNetImageClassifier,
+)
 from keras_nlp.src.models.deberta_v3.deberta_v3_backbone import (
     DebertaV3Backbone,
 )
diff --git a/keras_nlp/src/models/csp_darknet/csp_darknet_backbone.py b/keras_nlp/src/models/csp_darknet/csp_darknet_backbone.py
index 60a1f84d33..480d75c321 100644
--- a/keras_nlp/src/models/csp_darknet/csp_darknet_backbone.py
+++ b/keras_nlp/src/models/csp_darknet/csp_darknet_backbone.py
@@ -20,11 +20,11 @@
 
 @keras_nlp_export("keras_nlp.models.CSPDarkNetBackbone")
 class CSPDarkNetBackbone(Backbone):
-    """
-    This class represents Keras Backbone of CSPDarkNet model.
+    """This class represents Keras Backbone of CSPDarkNet model.
 
     This class implements a CSPDarkNet backbone as described in
-    [CSPNet: A New Backbone that can Enhance Learning Capability of CNN](https://arxiv.org/abs/1911.11929).
+    [CSPNet: A New Backbone that can Enhance Learning Capability of CNN](
+        https://arxiv.org/abs/1911.11929).
 
     Args:
         stackwise_num_filters:  A list of ints, filter size for each dark
@@ -68,12 +68,11 @@ def __init__(
         input_image_shape=(224, 224, 3),
         **kwargs,
     ):
-
         # === Functional Model ===
         apply_ConvBlock = (
-            apply_DarknetConvBlockDepthwise
+            apply_darknet_conv_block_depthwise
             if use_depthwise
-            else apply_DarknetConvBlock
+            else apply_darknet_conv_block
         )
         base_channels = stackwise_num_filters[0] // 2
 
@@ -83,7 +82,7 @@ def __init__(
             x = layers.Rescaling(scale=1 / 255.0)(x)
 
         x = apply_focus(name="stem_focus")(x)
-        x = apply_DarknetConvBlock(
+        x = apply_darknet_conv_block(
             base_channels, kernel_size=3, strides=1, name="stem_conv"
         )(x)
         for index, (channels, depth) in enumerate(
@@ -97,13 +96,13 @@ def __init__(
             )(x)
 
             if index == len(stackwise_depth) - 1:
-                x = apply_SpatialPyramidPoolingBottleneck(
+                x = apply_spatial_pyramid_pooling_bottleneck(
                     channels,
                     hidden_filters=channels // 2,
                     name=f"dark{index + 2}_spp",
                 )(x)
 
-            x = apply_CrossStagePartial(
+            x = apply_cross_stage_partial(
                 channels,
                 num_bottlenecks=depth,
                 use_depthwise=use_depthwise,
@@ -162,7 +161,7 @@ def apply(x):
     return apply
 
 
-def apply_DarknetConvBlock(
+def apply_darknet_conv_block(
     filters, kernel_size, strides, use_bias=False, activation="silu", name=None
 ):
     """
@@ -210,7 +209,7 @@ def apply(inputs):
     return apply
 
 
-def apply_DarknetConvBlockDepthwise(
+def apply_darknet_conv_block_depthwise(
     filters, kernel_size, strides, activation="silu", name=None
 ):
     """
@@ -246,7 +245,7 @@ def apply(inputs):
         elif activation == "leaky_relu":
             x = layers.LeakyReLU(0.1)(x)
 
-        x = apply_DarknetConvBlock(
+        x = apply_darknet_conv_block(
             filters, kernel_size=1, strides=1, activation=activation
         )(x)
 
@@ -255,7 +254,7 @@ def apply(inputs):
     return apply
 
 
-def apply_SpatialPyramidPoolingBottleneck(
+def apply_spatial_pyramid_pooling_bottleneck(
     filters,
     hidden_filters=None,
     kernel_sizes=(5, 9, 13),
@@ -288,7 +287,7 @@ def apply_SpatialPyramidPoolingBottleneck(
         hidden_filters = filters
 
     def apply(x):
-        x = apply_DarknetConvBlock(
+        x = apply_darknet_conv_block(
             hidden_filters,
             kernel_size=1,
             strides=1,
@@ -308,7 +307,7 @@ def apply(x):
             )
 
         x = layers.Concatenate(name=f"{name}_concat")(x)
-        x = apply_DarknetConvBlock(
+        x = apply_darknet_conv_block(
             filters,
             kernel_size=1,
             strides=1,
@@ -321,7 +320,7 @@ def apply(x):
     return apply
 
 
-def apply_CrossStagePartial(
+def apply_cross_stage_partial(
     filters,
     num_bottlenecks,
     residual=True,
@@ -352,12 +351,12 @@ def apply_CrossStagePartial(
     def apply(inputs):
         hidden_channels = filters // 2
         ConvBlock = (
-            apply_DarknetConvBlockDepthwise
+            apply_darknet_conv_block_depthwise
             if use_depthwise
-            else apply_DarknetConvBlock
+            else apply_darknet_conv_block
         )
 
-        x1 = apply_DarknetConvBlock(
+        x1 = apply_darknet_conv_block(
             hidden_channels,
             kernel_size=1,
             strides=1,
@@ -365,7 +364,7 @@ def apply(inputs):
             name=f"{name}_conv1",
         )(inputs)
 
-        x2 = apply_DarknetConvBlock(
+        x2 = apply_darknet_conv_block(
             hidden_channels,
             kernel_size=1,
             strides=1,
@@ -375,7 +374,7 @@ def apply(inputs):
 
         for i in range(num_bottlenecks):
             residual_x = x1
-            x1 = apply_DarknetConvBlock(
+            x1 = apply_darknet_conv_block(
                 hidden_channels,
                 kernel_size=1,
                 strides=1,
@@ -395,7 +394,7 @@ def apply(inputs):
                 )
 
         x = layers.Concatenate(name=f"{name}_concat")([x1, x2])
-        x = apply_DarknetConvBlock(
+        x = apply_darknet_conv_block(
             filters,
             kernel_size=1,
             strides=1,
diff --git a/keras_nlp/src/models/csp_darknet/csp_darknet_image_classifier.py b/keras_nlp/src/models/csp_darknet/csp_darknet_image_classifier.py
index 5bf74ff202..ea0c81a901 100644
--- a/keras_nlp/src/models/csp_darknet/csp_darknet_image_classifier.py
+++ b/keras_nlp/src/models/csp_darknet/csp_darknet_image_classifier.py
@@ -42,7 +42,8 @@ class CSPDarkNetImageClassifier(ImageClassifier):
     ```python
     # Load preset and train
     images = np.ones((2, 224, 224, 3), dtype="float32")
-    classifier = keras_nlp.models.CSPDarkNetImageClassifier.from_preset("csp_darknet_tiny_imagenet")
+    classifier = keras_nlp.models.CSPDarkNetImageClassifier.from_preset(
+        "csp_darknet_tiny_imagenet")
     classifier.predict(images)
     ```
 
@@ -51,13 +52,15 @@ class CSPDarkNetImageClassifier(ImageClassifier):
     # Load preset and train
     images = np.ones((2, 224, 224, 3), dtype="float32")
     labels = [0, 3]
-    classifier = keras_nlp.models.CSPDarkNetImageClassifier.from_preset("csp_darknet_tiny_imagenet")
+    classifier = keras_nlp.models.CSPDarkNetImageClassifier.from_preset(
+        "csp_darknet_tiny_imagenet")
     classifier.fit(x=images, y=labels, batch_size=2)
     ```
 
     Call `fit()` with custom loss, optimizer and backbone.
     ```python
-    classifier = keras_nlp.models.CSPDarkNetImageClassifier.from_preset("csp_darknet_tiny_imagenet")
+    classifier = keras_nlp.models.CSPDarkNetImageClassifier.from_preset(
+        "csp_darknet_tiny_imagenet")
     classifier.compile(
         loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
         optimizer=keras.optimizers.Adam(5e-5),

From 2502d4b5071f789faefac97ed7930cf7b5236d88 Mon Sep 17 00:00:00 2001
From: Sachin Prasad <sachinprasad@google.com>
Date: Wed, 14 Aug 2024 22:00:34 +0000
Subject: [PATCH 4/4] change use_depthwise to block_type

---
 .../csp_darknet/csp_darknet_backbone.py       | 27 ++++++++++---------
 .../csp_darknet/csp_darknet_backbone_test.py  |  2 +-
 .../csp_darknet_image_classifier.py           |  2 +-
 .../csp_darknet_image_classifier_test.py      |  2 +-
 4 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/keras_nlp/src/models/csp_darknet/csp_darknet_backbone.py b/keras_nlp/src/models/csp_darknet/csp_darknet_backbone.py
index 480d75c321..2745f61d01 100644
--- a/keras_nlp/src/models/csp_darknet/csp_darknet_backbone.py
+++ b/keras_nlp/src/models/csp_darknet/csp_darknet_backbone.py
@@ -34,8 +34,10 @@ class CSPDarkNetBackbone(Backbone):
         include_rescaling: boolean. If `True`, rescale the input using
             `Rescaling(1 / 255.0)` layer. If `False`, do nothing. Defaults to
             `True`.
-        use_depthwise: bool, whether a `DarknetConvBlockDepthwise` should be
-            used over a `DarknetConvBlock`, defaults to False.
+        block_type: str. One of `"basic_block"` or `"depthwise_block"`.
+            Use `"depthwise_block"` for depthwise conv block
+            `"basic_block"` for basic conv block.
+            Defaults to "basic_block".
         input_image_shape: tuple. The input shape without the batch size.
             Defaults to `(None, None, 3)`.
 
@@ -64,14 +66,14 @@ def __init__(
         stackwise_num_filters,
         stackwise_depth,
         include_rescaling,
-        use_depthwise=False,
+        block_type="basic_block",
         input_image_shape=(224, 224, 3),
         **kwargs,
     ):
         # === Functional Model ===
         apply_ConvBlock = (
             apply_darknet_conv_block_depthwise
-            if use_depthwise
+            if block_type == "depthwise_block"
             else apply_darknet_conv_block
         )
         base_channels = stackwise_num_filters[0] // 2
@@ -105,7 +107,7 @@ def __init__(
             x = apply_cross_stage_partial(
                 channels,
                 num_bottlenecks=depth,
-                use_depthwise=use_depthwise,
+                block_type="basic_block",
                 residual=(index != len(stackwise_depth) - 1),
                 name=f"dark{index + 2}_csp",
             )(x)
@@ -116,7 +118,7 @@ def __init__(
         self.stackwise_num_filters = stackwise_num_filters
         self.stackwise_depth = stackwise_depth
         self.include_rescaling = include_rescaling
-        self.use_depthwise = use_depthwise
+        self.block_type = block_type
         self.input_image_shape = input_image_shape
 
     def get_config(self):
@@ -126,7 +128,7 @@ def get_config(self):
                 "stackwise_num_filters": self.stackwise_num_filters,
                 "stackwise_depth": self.stackwise_depth,
                 "include_rescaling": self.include_rescaling,
-                "use_depthwise": self.use_depthwise,
+                "block_type": self.block_type,
                 "input_image_shape": self.input_image_shape,
             }
         )
@@ -324,7 +326,7 @@ def apply_cross_stage_partial(
     filters,
     num_bottlenecks,
     residual=True,
-    use_depthwise=False,
+    block_type="basic_block",
     activation="silu",
     name=None,
 ):
@@ -338,9 +340,10 @@ def apply_cross_stage_partial(
         residual: a boolean representing whether the value tensor before the
             bottleneck should be added to the output of the bottleneck as a
             residual, defaults to True.
-        use_depthwise: a boolean value used to decide whether a depthwise conv
-            block should be used over a regular darknet block, defaults to
-            False.
+        block_type: str. One of `"basic_block"` or `"depthwise_block"`.
+            Use `"depthwise_block"` for depthwise conv block
+            `"basic_block"` for basic conv block.
+            Defaults to "basic_block".
         activation: the activation applied after the final layer. One of "silu",
             "relu" or "leaky_relu", defaults to "silu".
     """
@@ -352,7 +355,7 @@ def apply(inputs):
         hidden_channels = filters // 2
         ConvBlock = (
             apply_darknet_conv_block_depthwise
-            if use_depthwise
+            if block_type == "basic_block"
             else apply_darknet_conv_block
         )
 
diff --git a/keras_nlp/src/models/csp_darknet/csp_darknet_backbone_test.py b/keras_nlp/src/models/csp_darknet/csp_darknet_backbone_test.py
index 19784d29cd..aaad4fe515 100644
--- a/keras_nlp/src/models/csp_darknet/csp_darknet_backbone_test.py
+++ b/keras_nlp/src/models/csp_darknet/csp_darknet_backbone_test.py
@@ -27,7 +27,7 @@ def setUp(self):
             "stackwise_num_filters": [32, 64, 128, 256],
             "stackwise_depth": [1, 3, 3, 1],
             "include_rescaling": False,
-            "use_depthwise": False,
+            "block_type": "basic_block",
             "input_image_shape": (224, 224, 3),
         }
         self.input_data = np.ones((2, 224, 224, 3), dtype="float32")
diff --git a/keras_nlp/src/models/csp_darknet/csp_darknet_image_classifier.py b/keras_nlp/src/models/csp_darknet/csp_darknet_image_classifier.py
index ea0c81a901..6b013bdcc0 100644
--- a/keras_nlp/src/models/csp_darknet/csp_darknet_image_classifier.py
+++ b/keras_nlp/src/models/csp_darknet/csp_darknet_image_classifier.py
@@ -77,7 +77,7 @@ class CSPDarkNetImageClassifier(ImageClassifier):
         stackwise_num_filters=[128, 256, 512, 1024],
         stackwise_depth=[3, 9, 9, 3],
         include_rescaling=False,
-        use_depthwise = False,
+        block_type="basic_block",
         input_image_shape = (224, 224, 3),
     )
     classifier = keras_nlp.models.CSPDarkNetImageClassifier(
diff --git a/keras_nlp/src/models/csp_darknet/csp_darknet_image_classifier_test.py b/keras_nlp/src/models/csp_darknet/csp_darknet_image_classifier_test.py
index ceb0cfe96b..a07bb017a3 100644
--- a/keras_nlp/src/models/csp_darknet/csp_darknet_image_classifier_test.py
+++ b/keras_nlp/src/models/csp_darknet/csp_darknet_image_classifier_test.py
@@ -32,7 +32,7 @@ def setUp(self):
             stackwise_num_filters=[2, 16, 16],
             stackwise_depth=[1, 3, 3, 1],
             include_rescaling=False,
-            use_depthwise=False,
+            block_type="basic_block",
             input_image_shape=(16, 16, 3),
         )
         self.init_kwargs = {