From 40f0380bba1fceed1dbc470672b67b4b14d4b446 Mon Sep 17 00:00:00 2001
From: Matt Watson <mattdangerw@gmail.com>
Date: Fri, 8 Dec 2023 11:29:59 -0800
Subject: [PATCH] Remove lora dense for now

We are consider brining this into core Keras, with a slightly different
design. Let's remove this version from KerasNLP so people don't rely on
it.
---
 keras_nlp/layers/__init__.py                 |   1 -
 keras_nlp/layers/modeling/lora_dense.py      | 234 -------------------
 keras_nlp/layers/modeling/lora_dense_test.py | 135 -----------
 3 files changed, 370 deletions(-)
 delete mode 100644 keras_nlp/layers/modeling/lora_dense.py
 delete mode 100644 keras_nlp/layers/modeling/lora_dense_test.py

diff --git a/keras_nlp/layers/__init__.py b/keras_nlp/layers/__init__.py
index 71c5eb0411..595c4eb661 100644
--- a/keras_nlp/layers/__init__.py
+++ b/keras_nlp/layers/__init__.py
@@ -16,7 +16,6 @@
     CachedMultiHeadAttention,
 )
 from keras_nlp.layers.modeling.f_net_encoder import FNetEncoder
-from keras_nlp.layers.modeling.lora_dense import LoraDense
 from keras_nlp.layers.modeling.masked_lm_head import MaskedLMHead
 from keras_nlp.layers.modeling.position_embedding import PositionEmbedding
 from keras_nlp.layers.modeling.reversible_embedding import ReversibleEmbedding
diff --git a/keras_nlp/layers/modeling/lora_dense.py b/keras_nlp/layers/modeling/lora_dense.py
deleted file mode 100644
index c439d86399..0000000000
--- a/keras_nlp/layers/modeling/lora_dense.py
+++ /dev/null
@@ -1,234 +0,0 @@
-# Copyright 2023 The KerasNLP Authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import re
-
-from keras_nlp.api_export import keras_nlp_export
-from keras_nlp.backend import config
-from keras_nlp.backend import keras
-from keras_nlp.backend import ops
-
-
-def validate_einsum_equation(equation):
-    # For simplicity, we greatly restrict possible einsum equations. The final
-    # axis of the input must be the first axis of our kernel, and must not
-    # appear in our output.
-    left, right, output = re.split(",|->", equation)
-    valid = (
-        left[-1] == right[0]
-        and left[-1] not in output
-        and set(left[:-1]).isdisjoint(set(right[1:]))
-    )
-    if not valid:
-        raise ValueError(
-            "When passing a `EinsumDense` layer to a `LoraDense` layer, the "
-            "einsum `equation` must always have the form `*x,x*->*`, where "
-            "each `*` can be any sequence. Conceptually, the `equation` should "
-            "always represent a dense matmul on the last axis of the input. "
-            f"Received invalid equation `'{equation}'`."
-        )
-
-
-@keras_nlp_export("keras_nlp.layers.LoraDense")
-class LoraDense(keras.layers.Layer):
-    """A LoRA adapter layer for a dense input layer.
-
-    This layer implements a low-rank decomposition of a dense transformation, as
-    described in [LoRA: Low-Rank Adaptation Of Large Language Models](https://arxiv.org/pdf/2106.09685.pdf)
-    This layer can be used to replace a dense layer with a layer whose
-    parameters are mostly frozen.
-
-    By default, this layer takes in an `inner_dense` layer, freezes its
-    parameters, and builds a low-rank decomposed update to sum with the original
-    `inner_dense` output. These update parameters can be merged back into the
-    `inner_dense` kernel by calling `merge_weights()`.
-
-    Args:
-        inner_dense: A `keras.layers.Dense` or `keras.layers.EinsumDense`.
-            The inner dense layer to freeze and wrap with the `LoraDense`
-            layer. Note that for `EinsumDense` layers, the einsum equation must
-            represent a dense transformation on the last axis of the input,
-            though adding new axes to the output (e.g. a multi-head axis) is
-            allowed.
-        rank: int. The inner rank of the decomposed dense transformation. The
-            lower this number, the fewer trainable parameters the layer will
-            have.
-        alpha: float. A constant value used for scaling the lora update. The
-            lora update to the original dense transformation will be scaled by
-            `alpha / rank`.
-        lora_a_initializer: The initializer to use for the inner projection
-            from layer inputs to the inner `rank` intermediate outputs.
-        freeze_kernel: If true, the kernel of the inner dense layer will have
-            `trainable` set to `False`.
-        freeze_bias: If true, the kernel of the inner dense layer will have
-            `trainable` set to `False`.
-        **kwargs: other keyword arguments.
-
-    Examples:
-
-    Wrap a `Dense` layer.
-    ```python
-    batch_size, feature_size = 4, 16
-    rank = 4
-    inputs = np.random.uniform(size=(batch_size, feature_size))
-    inner_dense = keras.layers.Dense(feature_size)
-    lora_dense = keras_nlp.layers.LoraDense(inner_dense, rank=4)
-    # Output with inner dense begins equal.
-    assert np.allclose(inner_dense(inputs), lora_dense(inputs))
-
-    # Add some random updates to the lora parameters.
-    lora_dense.lora_a.assign(np.random.uniform(size=(feature_size, rank)))
-    lora_dense.lora_b.assign(np.random.uniform(size=(rank, feature_size)))
-    assert not np.allclose(inner_dense(inputs), lora_dense(inputs))
-
-    # Merge the lora dense and output
-    lora_dense.merge_weights()
-    assert np.allclose(inner_dense(inputs), lora_dense(inputs))
-    ```
-
-    Wrap an `EinsumDense` layer with a multi-head projection.
-    ```python
-    batch_size, sequence_length, feature_size = 4, 10, 16
-    num_heads = 2
-    rank = 4
-    inputs = np.random.uniform(size=(batch_size, sequence_length, feature_size))
-    inner_dense = keras.layers.EinsumDense(
-        "abc,cde->abde",
-        output_shape=(sequence_length, num_heads, feature_size // num_heads),
-    )
-    lora_dense = keras_nlp.layers.LoraDense(inner_dense, rank=4)
-    # Output shape (4, 10, 2, 8)
-    lora_dense(inputs)
-    ```
-    """
-
-    def __init__(
-        self,
-        inner_dense,
-        rank=8,
-        alpha=8.0,
-        lora_a_initializer="variance_scaling",
-        freeze_kernel=True,
-        freeze_bias=True,
-        **kwargs,
-    ):
-        # Default to the same dtype as our inner layer.
-        if "dtype" not in kwargs:
-            kwargs["dtype"] = inner_dense.dtype_policy
-        super().__init__(**kwargs)
-
-        if not config.keras_3():
-            raise ValueError(
-                "Lora requires with Keras 3, but Keras 2 is installed. Please "
-                "see https://github.com/keras-team/keras-nlp#installation"
-            )
-
-        if isinstance(inner_dense, keras.layers.Dense):
-            self.inner_dense = inner_dense
-        elif isinstance(inner_dense, keras.layers.EinsumDense):
-            self.inner_dense = inner_dense
-            validate_einsum_equation(inner_dense.equation)
-        else:
-            raise ValueError(
-                "Only `Dense` and `EinsumDense` inner layers are supported. "
-                f"Received: inner_dense={inner_dense}"
-            )
-
-        self.rank = rank
-        self.alpha = alpha
-        self.scale = alpha / rank
-        self.freeze_kernel = freeze_kernel
-        self.freeze_bias = freeze_bias
-        self.lora_a_initializer = keras.initializers.get(lora_a_initializer)
-
-        if inner_dense.built:
-            self.build_from_config(inner_dense.get_build_config())
-
-    def build(self, inputs_shape):
-        if not self.inner_dense.built:
-            self.inner_dense.build(inputs_shape)
-
-        if self.freeze_kernel and self.inner_dense.kernel is not None:
-            self.inner_dense.kernel.trainable = False
-
-        if self.freeze_bias and self.inner_dense.bias is not None:
-            self.inner_dense.bias.trainable = False
-
-        input_dim = inputs_shape[-1]
-        self.lora_a = self.add_weight(
-            name="lora_a",
-            shape=(input_dim, self.rank),
-            initializer=self.lora_a_initializer,
-        )
-        kernel_shape = self.inner_dense.kernel.shape
-        self.lora_b = self.add_weight(
-            name="lora_b",
-            shape=(self.rank,) + kernel_shape[1:],
-            initializer="zeros",
-        )
-        self.built = True
-
-    def merge_weights(self):
-        """Merge lora updates into the wrapped dense layer.
-
-        This function should only be called outside of any compiled context
-        (e.g. not during `fit()`, `predict()` or `evaluate()`). It will merge
-        the updates from the lora layers into the original dense layer, and
-        re-initialize the lora variables.
-        """
-        if not self.built:
-            return
-
-        # Compute matmul of lora_a and lora_b to get a kernel sized update.
-        update = ops.tensordot(self.lora_a, self.lora_b, axes=([-1], [0]))
-        update = update * ops.cast(self.scale, update.dtype)
-        # Add lora updates back into the inner dense kernel.
-        self.inner_dense.kernel.assign_add(update)
-        # Re-initialize lora weights.
-        self.lora_a.assign(
-            self.lora_a_initializer(self.lora_a.shape, self.lora_a.dtype)
-        )
-        self.lora_b.assign(ops.zeros_like(self.lora_b))
-
-    def call(self, inputs):
-        original_output = self.inner_dense(inputs)
-        # Compute the low-rank intermediate output.
-        update = ops.matmul(inputs, self.lora_a)
-        # Use the matching dense computation for a Dense or EinsumDense.
-        if isinstance(self.inner_dense, keras.layers.Dense):
-            update = ops.matmul(update, self.lora_b)
-        else:
-            update = ops.einsum(self.inner_dense.equation, update, self.lora_b)
-        # Scale and sum the lora update with the original frozen output.
-        return original_output + update * ops.cast(self.scale, update.dtype)
-
-    @classmethod
-    def from_config(cls, config):
-        config["inner_dense"] = keras.layers.deserialize(config["inner_dense"])
-        return super().from_config(config)
-
-    def get_config(self):
-        config = super().get_config()
-        config.update(
-            {
-                "inner_dense": keras.layers.serialize(self.inner_dense),
-                "rank": self.rank,
-                "alpha": self.alpha,
-                "lora_a_initializer": keras.initializers.serialize(
-                    self.lora_a_initializer
-                ),
-                "freeze_kernel": self.freeze_kernel,
-                "freeze_bias": self.freeze_bias,
-            }
-        )
-        return config
diff --git a/keras_nlp/layers/modeling/lora_dense_test.py b/keras_nlp/layers/modeling/lora_dense_test.py
deleted file mode 100644
index a15718b03a..0000000000
--- a/keras_nlp/layers/modeling/lora_dense_test.py
+++ /dev/null
@@ -1,135 +0,0 @@
-# Copyright 2023 The KerasNLP Authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import pytest
-
-from keras_nlp.backend import keras
-from keras_nlp.backend import random
-from keras_nlp.layers.modeling.lora_dense import LoraDense
-from keras_nlp.tests.test_case import TestCase
-
-
-@pytest.mark.keras_3_only
-class LoraDenseTest(TestCase):
-    def test_layer_behaviors(self):
-        self.run_layer_test(
-            cls=LoraDense,
-            init_kwargs={
-                "inner_dense": keras.layers.Dense(16),
-                "rank": 2,
-                "alpha": 16,
-                "lora_a_initializer": "HeNormal",
-            },
-            input_data=random.uniform(shape=(2, 4, 8)),
-            expected_output_shape=(2, 4, 16),
-            expected_num_trainable_weights=2,
-            expected_num_non_trainable_weights=2,
-            expected_num_non_trainable_variables=2,
-            run_mixed_precision_check=False,
-        )
-
-    def test_layer_behaviors_einsum(self):
-        self.run_layer_test(
-            cls=LoraDense,
-            init_kwargs={
-                "inner_dense": keras.layers.EinsumDense(
-                    "abc,cde->abde",
-                    output_shape=(None, 2, 16),
-                ),
-                "lora_a_initializer": "HeNormal",
-            },
-            input_data=random.uniform(shape=(2, 4, 8)),
-            expected_output_shape=(2, 4, 2, 16),
-            expected_num_trainable_weights=2,
-            expected_num_non_trainable_weights=1,
-            expected_num_non_trainable_variables=1,
-            run_mixed_precision_check=False,
-        )
-
-    def test_merge_dense(self):
-        inner_dense = keras.layers.Dense(16)
-        layer = LoraDense(inner_dense, rank=4)
-        layer.build((2, 16))
-        layer.lora_a.assign(random.uniform(shape=(16, 4)))
-        layer.lora_b.assign(random.uniform(shape=(4, 16)))
-
-        input_data = random.uniform((2, 16))
-        lora_output = layer(input_data)
-        dense_output = inner_dense(input_data)
-        self.assertNotAllClose(lora_output, dense_output)
-
-        layer.merge_weights()
-        merged_lora_output = layer(input_data)
-        dense_output = inner_dense(input_data)
-        self.assertAllClose(lora_output, merged_lora_output)
-        self.assertAllClose(lora_output, dense_output)
-
-    def test_merge_einsum(self):
-        inner_dense = keras.layers.EinsumDense(
-            "abc,cde->abde",
-            output_shape=(None, 2, 16),
-        )
-        layer = LoraDense(inner_dense, rank=4)
-        layer.build((2, 4, 16))
-        layer.lora_a.assign(random.uniform(shape=(16, 4)))
-        layer.lora_b.assign(random.uniform(shape=(4, 2, 16)))
-
-        input_data = random.uniform((2, 4, 16))
-        lora_output = layer(input_data)
-        dense_output = inner_dense(input_data)
-        self.assertNotAllClose(lora_output, dense_output)
-
-        layer.merge_weights()
-        merged_lora_output = layer(input_data)
-        dense_output = inner_dense(input_data)
-        self.assertAllClose(lora_output, merged_lora_output)
-        self.assertAllClose(lora_output, dense_output)
-
-    def test_freezing(self):
-        inner_dense = keras.layers.Dense(16)
-        layer = LoraDense(inner_dense, freeze_bias=False)
-        layer.build((2, 16))
-        self.assertFalse(inner_dense.kernel.trainable)
-        self.assertTrue(inner_dense.bias.trainable)
-
-        inner_dense = keras.layers.Dense(16)
-        layer = LoraDense(inner_dense)
-        layer.build((2, 16))
-        self.assertFalse(inner_dense.kernel.trainable)
-        self.assertFalse(inner_dense.bias.trainable)
-
-    def test_errors_if_not_dense(self):
-        with self.assertRaises(ValueError):
-            LoraDense(keras.layers.Concatenate())
-
-    def test_errors_invalid_einsum(self):
-        with self.assertRaises(ValueError):
-            # Kernel feature dim in the wrong place.
-            einsum = keras.layers.EinsumDense("abc,dec->abde", (2, 2, 16))
-            LoraDense(einsum, rank=4)
-
-        with self.assertRaises(ValueError):
-            # Input feature dim in the wrong place.
-            einsum = keras.layers.EinsumDense("acb,cde->abde", (2, 2, 16))
-            LoraDense(einsum, rank=4)
-
-        with self.assertRaises(ValueError):
-            # Input feature dim not summed over.
-            einsum = keras.layers.EinsumDense("abc,cde->abcde", (2, 2, 2, 16))
-            LoraDense(einsum, rank=4)
-
-        with self.assertRaises(ValueError):
-            # Double summations.
-            einsum = keras.layers.EinsumDense("abcd,cde->abe", (2, 2, 16))
-            LoraDense(einsum, rank=4)