diff --git a/tests/test_custom_models.py b/tests/test_custom_models.py
index cf431f7f69..0f67e844fd 100644
--- a/tests/test_custom_models.py
+++ b/tests/test_custom_models.py
@@ -1230,7 +1230,6 @@
         {"target_modules": ["lin0"], "init_weights": False},
         {"target_modules": ["lin1"], "init_weights": False},
     ),
-    # BD-LoRA different encounters issues as the adapter weights have different shapes then
 ]
 
 
@@ -1365,10 +1364,6 @@ def __init__(self, emb_size=100):
         super().__init__()
         self.emb = nn.Embedding(emb_size, 5)
         self.conv1d = Conv1D(1, 5)
-        # make sure that we have a good signal-to-noise ratio
-        # since apparently CUDA ReLU clips the gradient at a
-        # certain point.
-        self.conv1d.weight.data += 10
         self.relu = nn.ReLU()
         self.flat = nn.Flatten()
         self.lin0 = nn.Linear(10, 2)
@@ -2271,6 +2266,13 @@ def test_disable_adapters_with_merging(self, test_name, model_id, config_cls, co
         # same as test_disable_adapters, but with merging
         X = self.prepare_inputs_for_testing()
         model = self.transformers_class.from_pretrained(model_id).to(self.torch_device)
+
+        if isinstance(model, ModelEmbConv1D) and (self.torch_device != "cpu"):
+            # Make sure that we have a good signal-to-noise ratio
+            # since apparently CUDA ReLU clips the gradient at a
+            # certain point. On CPU, avoid this.
+            model.conv1d.weight.data += 10
+
         config = config_cls(
             base_model_name_or_path=model_id,
             **config_kwargs,