diff --git a/Makefile b/Makefile
index e93f23296a..2404e5fa4b 100644
--- a/Makefile
+++ b/Makefile
@@ -44,6 +44,7 @@ fast_tests:
 fast_tests_diffusers:
 	python -m pip install .[tests]
 	python -m pip install -r examples/stable-diffusion/requirements.txt
+	python -m pip install peft==0.16.0
 	python -m pytest tests/test_diffusers.py
 
 # Run single-card non-regression tests on image classification models
@@ -86,7 +87,7 @@ slow_tests_custom_file_input: test_installs
 slow_tests_1x: test_installs
 	@status1=0; status2=0; status3=0; \
 	python -m pytest tests/test_examples.py -v -s -k "single_card" || status1=$$?; \
-	python -m pip install peft==0.10.0; \
+	python -m pip install peft==0.12.0; \
 	python -m pytest tests/test_peft_inference.py || status2=$$?; \
 	python -m pytest tests/test_pipeline.py || status3=$$?; \
 	exit $$((status1 + status2 + status3))
diff --git a/examples/stable-diffusion/text_to_image_generation.py b/examples/stable-diffusion/text_to_image_generation.py
index d432f04fb7..4308b90e24 100755
--- a/examples/stable-diffusion/text_to_image_generation.py
+++ b/examples/stable-diffusion/text_to_image_generation.py
@@ -576,19 +576,24 @@ def main():
                 pipeline.unet.set_default_attn_processor(pipeline.unet)
 
                 if args.unet_adapter_name_or_path is not None:
-                    from peft import PeftModel
+                    from peft import PeftModel, tuners
+
+                    tuners.boft.layer._FBD_CUDA = False
 
                     pipeline.unet = PeftModel.from_pretrained(pipeline.unet, args.unet_adapter_name_or_path)
-                    pipeline.unet = pipeline.unet.merge_and_unload()
+                    with torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=args.bf16):
+                        pipeline.unet = pipeline.unet.merge_and_unload()
 
                 if args.text_encoder_adapter_name_or_path is not None:
-                    from peft import PeftModel
+                    from peft import PeftModel, tuners
+
+                    tuners.boft.layer._FBD_CUDA = False
 
                     pipeline.text_encoder = PeftModel.from_pretrained(
                         pipeline.text_encoder, args.text_encoder_adapter_name_or_path
                     )
-                    pipeline.text_encoder = pipeline.text_encoder.merge_and_unload()
-
+                    with torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=args.bf16):
+                        pipeline.text_encoder = pipeline.text_encoder.merge_and_unload()
             else:
                 # SD LDM3D use-case
                 from optimum.habana.diffusers import GaudiStableDiffusionLDM3DPipeline as GaudiStableDiffusionPipeline
diff --git a/examples/stable-diffusion/training/README.md b/examples/stable-diffusion/training/README.md
index be30a5ae46..096593d210 100644
--- a/examples/stable-diffusion/training/README.md
+++ b/examples/stable-diffusion/training/README.md
@@ -244,6 +244,7 @@ PT_HPU_LAZY_MODE=1 python ../../gaudi_spawn.py --world_size 8 --use_mpi train_dr
     --mixed_precision=bf16 \
     --use_hpu_graphs_for_training \
     --use_hpu_graphs_for_inference \
+    --sdp_on_bf16 \
     --gaudi_config_name Habana/stable-diffusion \
     full
 ```
@@ -257,7 +258,7 @@ generate any additional images needed to meet the `num_class_images` requirement
 
 ### PEFT Model Fine-Tuning
 
-We provide DreamBooth examples demonstrating how to use LoRA, LoKR, LoHA, and OFT adapters to fine-tune the
+We provide DreamBooth examples demonstrating how to use LoRA, LoKR, LoHA, OFT and BOFT adapters to fine-tune the
 UNet or text encoder.
 
 To run the multi-card training, use:
@@ -283,6 +284,7 @@ PT_HPU_LAZY_MODE=1 python ../../gaudi_spawn.py --world_size 8 --use_mpi train_dr
     --mixed_precision=bf16 \
     --use_hpu_graphs_for_training \
     --use_hpu_graphs_for_inference \
+    --sdp_on_bf16 \
     --gaudi_config_name Habana/stable-diffusion \
     lora --unet_r 8 --unet_alpha 8
 ```
@@ -291,7 +293,7 @@ PT_HPU_LAZY_MODE=1 python ../../gaudi_spawn.py --world_size 8 --use_mpi train_dr
 > When using PEFT method we can use a much higher learning rate compared to vanilla dreambooth.
 > Here we use `1e-4` instead of the usual `5e-6`
 
-Similar command could be applied with `loha`, `lokr`, or `oft` adapters.
+Similar command could be applied with `loha`, `lokr`, `oft` or `boft` adapters.
 
 You could check each adapter's specific arguments with `--help`, for example:
 
@@ -300,7 +302,8 @@ python train_dreambooth.py oft --help
 ```
 
 > [!WARNING]
-> Currently, the `oft` adapter is not supported in HPU graph mode, as it triggers `torch.inverse`,
+> Currently, the `oft` and `boft` adapter are not supported in HPU graph mode, as it triggers `torch.inverse`  `torch.linalg.solve`,
+
 > causing a CPU fallback that is incompatible with HPU graph capturing.
 
 After training completes, you can use `text_to_image_generation.py` sample for inference as follows:
@@ -346,6 +349,7 @@ PT_HPU_LAZY_MODE=1 python train_dreambooth_lora_sdxl.py \
     --seed=0 \
     --use_hpu_graphs_for_inference \
     --use_hpu_graphs_for_training \
+    --sdp_on_bf16 \
     --gaudi_config_name Habana/stable-diffusion
 ```
 
diff --git a/examples/stable-diffusion/training/requirements.txt b/examples/stable-diffusion/training/requirements.txt
index 5795525415..7f7e4a2d0e 100644
--- a/examples/stable-diffusion/training/requirements.txt
+++ b/examples/stable-diffusion/training/requirements.txt
@@ -2,5 +2,6 @@ compel
 datasets
 imagesize
 opencv-python
-peft==0.10.0
+peft==0.16.0
 sentencepiece
+tensorboard==2.19.0
diff --git a/examples/stable-diffusion/training/train_dreambooth.py b/examples/stable-diffusion/training/train_dreambooth.py
index 8e374d5c94..bc7d85f5e6 100755
--- a/examples/stable-diffusion/training/train_dreambooth.py
+++ b/examples/stable-diffusion/training/train_dreambooth.py
@@ -53,7 +53,7 @@
 from diffusers.utils.torch_utils import is_compiled_module
 from habana_frameworks.torch.hpu import memory_stats
 from huggingface_hub import HfApi
-from peft import LoHaConfig, LoKrConfig, LoraConfig, OFTConfig, get_peft_model
+from peft import BOFTConfig, LoHaConfig, LoKrConfig, LoraConfig, OFTConfig, get_peft_model, tuners
 from PIL import Image
 from torch.utils.data import Dataset
 from torchvision import transforms
@@ -108,7 +108,9 @@ def import_model_class_from_model_name_or_path(pretrained_model_name_or_path: st
         raise ValueError(f"{model_class} is not supported.")
 
 
-def create_unet_adapter_config(args: argparse.Namespace) -> Union[LoraConfig, LoHaConfig, LoKrConfig, OFTConfig]:
+def create_unet_adapter_config(
+    args: argparse.Namespace,
+) -> Union[LoraConfig, LoHaConfig, LoKrConfig, OFTConfig, BOFTConfig]:
     if args.adapter == "full":
         raise ValueError("Cannot create unet adapter config for full parameter")
 
@@ -151,7 +153,22 @@ def create_unet_adapter_config(args: argparse.Namespace) -> Union[LoraConfig, Lo
             init_weights=True,
             coft=args.unet_use_coft,
             eps=args.unet_eps,
+            oft_block_size=0,
+        )
+    elif args.adapter == "boft":
+        config = BOFTConfig(
+            boft_block_size=args.unet_block_size,
+            boft_block_num=args.unet_block_num,
+            boft_n_butterfly_factor=args.unet_n_butterfly_factor,
+            target_modules=UNET_TARGET_MODULES,
+            boft_dropout=args.unet_dropout,
+            bias=args.unet_bias,
         )
+        from optimum.habana.peft.layer import GaudiBoftLinearForward
+
+        tuners.boft.layer.Linear.forward = GaudiBoftLinearForward
+        tuners.boft.layer._FBD_CUDA = False
+
     else:
         raise ValueError(f"Unknown adapter type {args.adapter}")
 
@@ -160,7 +177,7 @@ def create_unet_adapter_config(args: argparse.Namespace) -> Union[LoraConfig, Lo
 
 def create_text_encoder_adapter_config(
     args: argparse.Namespace,
-) -> Union[LoraConfig, LoHaConfig, LoKrConfig, OFTConfig]:
+) -> Union[LoraConfig, LoHaConfig, LoKrConfig, OFTConfig, BOFTConfig]:
     if args.adapter == "full":
         raise ValueError("Cannot create text_encoder adapter config for full parameter")
 
@@ -201,7 +218,21 @@ def create_text_encoder_adapter_config(
             init_weights=True,
             coft=args.te_use_coft,
             eps=args.te_eps,
+            oft_block_size=0,
+        )
+    elif args.adapter == "boft":
+        config = BOFTConfig(
+            boft_block_size=args.te_block_size,
+            boft_block_num=args.te_block_num,
+            boft_n_butterfly_factor=args.te_n_butterfly_factor,
+            target_modules=TEXT_ENCODER_TARGET_MODULES,
+            boft_dropout=args.te_dropout,
+            bias=args.te_bias,
         )
+        from optimum.habana.peft.layer import GaudiBoftLinearForward
+
+        tuners.boft.layer.Linear.forward = GaudiBoftLinearForward
+        tuners.boft.layer._FBD_CUDA = False
     else:
         raise ValueError(f"Unknown adapter type {args.adapter}")
 
@@ -479,6 +510,12 @@ def parse_args(input_args=None):
         action="store_true",
         help="Use HPU graphs for inference on HPU.",
     )
+    parser.add_argument(
+        "--sdp_on_bf16",
+        action="store_true",
+        default=False,
+        help="Allow pyTorch to use reduced precision in the SDPA math backend",
+    )
 
     # Adapter arguments
     subparsers = parser.add_subparsers(dest="adapter")
@@ -632,6 +669,44 @@ def parse_args(input_args=None):
         help="The control strength of COFT for text_encoder, only used if `train_text_encoder` is True",
     )
 
+    # boft adapter
+    boft = subparsers.add_parser("boft", help="Use Boft adapter")
+    boft.add_argument("--unet_block_size", type=int, default=8, help="Boft block_size for unet")
+    boft.add_argument("--unet_block_num", type=int, default=0, help="Boft block_num for unet")
+    boft.add_argument("--unet_n_butterfly_factor", type=int, default=1, help="Boft n_butterfly_factor for unet")
+    boft.add_argument("--unet_dropout", type=float, default=0.1, help="Boft dropout for unet")
+    boft.add_argument("--unet_bias", type=str, default="boft_only", help="Boft bias for unet")
+    boft.add_argument(
+        "--te_block_size",
+        type=int,
+        default=8,
+        help="Boft block_size for text_encoder,only used if `train_text_encoder` is True",
+    )
+    boft.add_argument(
+        "--te_block_num",
+        type=int,
+        default=0,
+        help="Boft block_num for text_encoder,only used if `train_text_encoder` is True",
+    )
+    boft.add_argument(
+        "--te_n_butterfly_factor",
+        type=int,
+        default=1,
+        help="Boft n_butterfly_factor for text_encoder,only used if `train_text_encoder` is True",
+    )
+    boft.add_argument(
+        "--te_dropout",
+        type=float,
+        default=0.1,
+        help="Boft dropout for text_encoder,only used if `train_text_encoder` is True",
+    )
+    boft.add_argument(
+        "--te_bias",
+        type=str,
+        default="boft_only",
+        help="Boft bias for text_encoder, only used if `train_text_encoder` is True",
+    )
+
     if input_args is not None:
         args = parser.parse_args(input_args)
     else:
@@ -875,6 +950,9 @@ def main(args):
     if args.seed is not None:
         set_seed(args.seed)
 
+    if args.sdp_on_bf16:
+        torch._C._set_math_sdp_allow_fp16_bf16_reduction(True)
+
     # Generate class images if prior preservation is enabled.
     if args.with_prior_preservation:
         class_images_dir = Path(args.class_data_dir)
diff --git a/examples/stable-diffusion/training/train_dreambooth_lora_sdxl.py b/examples/stable-diffusion/training/train_dreambooth_lora_sdxl.py
index f5530ea9c2..b154265c61 100755
--- a/examples/stable-diffusion/training/train_dreambooth_lora_sdxl.py
+++ b/examples/stable-diffusion/training/train_dreambooth_lora_sdxl.py
@@ -566,6 +566,12 @@ def parse_args(input_args=None):
         action="store_true",
         help="Use HPU graphs for inference on HPU.",
     )
+    parser.add_argument(
+        "--sdp_on_bf16",
+        action="store_true",
+        default=False,
+        help="Allow pyTorch to use reduced precision in the SDPA math backend",
+    )
 
     if input_args is not None:
         args = parser.parse_args(input_args)
@@ -851,6 +857,9 @@ def main(args):
     if args.seed is not None:
         set_seed(args.seed)
 
+    if args.sdp_on_bf16:
+        torch._C._set_math_sdp_allow_fp16_bf16_reduction(True)
+
     # Generate class images if prior preservation is enabled.
     if args.with_prior_preservation:
         class_images_dir = Path(args.class_data_dir)
diff --git a/optimum/habana/peft/__init__.py b/optimum/habana/peft/__init__.py
index ed33e84393..f3c691ce17 100644
--- a/optimum/habana/peft/__init__.py
+++ b/optimum/habana/peft/__init__.py
@@ -2,6 +2,7 @@
     GaudiAdaloraLayerSVDLinearForward,
     GaudiAdaptedAttention_getattr,
     GaudiAdaptedAttentionPreAttnForward,
+    GaudiBoftLinearForward,
     GaudiPolyLayerLinearForward,
 )
 from .peft_model import gaudi_generate, gaudi_prepare_inputs_for_generation
diff --git a/optimum/habana/peft/layer.py b/optimum/habana/peft/layer.py
index fb6074cdbc..4d167d45a0 100755
--- a/optimum/habana/peft/layer.py
+++ b/optimum/habana/peft/layer.py
@@ -217,3 +217,69 @@ def GaudiAdaptedAttention_getattr(self, name: str):
         # This is necessary as e.g. causal models have various methods that we
         # don't want to re-implement here.
         return getattr(self.model, name)
+
+
+def GaudiBoftLinearForward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch.Tensor:
+    """
+    Copied from Linear.forward: https://github.com/huggingface/peft/blob/v0.16.0/src/peft/tuners/boft/layer.py#L591
+    The only differences are:
+    - change the cast dtype logic to avoid error in HPU
+    """
+    previous_dtype = x.dtype
+
+    if self.disable_adapters:
+        if self.merged:
+            self.unmerge()
+        result = self.base_layer(x, *args, **kwargs)
+    elif self.merged:
+        result = self.base_layer(x, *args, **kwargs)
+    else:
+        boft_rotation = torch.eye(self.in_features, device=x.device, dtype=previous_dtype)
+        boft_scale = torch.ones((int(self.out_features), 1), device=x.device, dtype=previous_dtype)
+
+        for active_adapter in self.active_adapters:
+            if active_adapter not in self.boft_R.keys():
+                continue
+            boft_R = self.boft_R[active_adapter]
+            boft_s = self.boft_s[active_adapter]
+            dropout = self.boft_dropout[active_adapter]
+
+            N, D, H, _ = boft_R.shape
+            boft_R = boft_R.view(N * D, H, H)
+            orth_rotate_butterfly = self.cayley_batch(boft_R)
+            orth_rotate_butterfly = orth_rotate_butterfly.view(N, D, H, H)
+            orth_rotate_butterfly = dropout(orth_rotate_butterfly)
+            orth_rotate_butterfly = orth_rotate_butterfly.squeeze(0)
+            block_diagonal_butterfly = torch.block_diag(*torch.unbind(orth_rotate_butterfly))
+            block_diagonal_butterfly = block_diagonal_butterfly.unsqueeze(0)
+
+            # The BOFT author's cayley_batch, dropout and FastBlockDiag ONLY return fp32 outputs.
+            boft_P = self.boft_P.to(x)
+            block_diagonal_butterfly = block_diagonal_butterfly.to(x)
+            butterfly_oft_mat_batch = torch.bmm(block_diagonal_butterfly, boft_P.permute(0, 2, 1))
+            butterfly_oft_mat_batch = torch.bmm(boft_P, butterfly_oft_mat_batch)
+            butterfly_oft_mat = butterfly_oft_mat_batch[0]
+
+            for i in range(1, butterfly_oft_mat_batch.shape[0]):
+                butterfly_oft_mat = butterfly_oft_mat_batch[i] @ butterfly_oft_mat
+
+            boft_rotation = butterfly_oft_mat @ boft_rotation
+            boft_scale = boft_s * boft_scale
+
+        x = x.to(self.get_base_layer().weight.data.dtype)
+
+        orig_weight = self.get_base_layer().weight.data
+        orig_weight = torch.transpose(orig_weight, 0, 1)
+        boft_rotation = boft_rotation.to(previous_dtype)
+        orig_weight = orig_weight.to(previous_dtype)
+        rotated_weight = torch.mm(boft_rotation, orig_weight)
+        rotated_weight = torch.transpose(rotated_weight, 0, 1)
+
+        scaled_rotated_weight = rotated_weight * boft_scale
+
+        scaled_rotated_weight = scaled_rotated_weight.to(previous_dtype)
+        bias = self._cast_input_dtype(self.base_layer.bias, scaled_rotated_weight.dtype)
+        result = F.linear(input=x, weight=scaled_rotated_weight, bias=bias)
+
+    result = result.to(previous_dtype)
+    return result
diff --git a/tests/test_diffusers.py b/tests/test_diffusers.py
index 41802b28d5..cee19d61d9 100644
--- a/tests/test_diffusers.py
+++ b/tests/test_diffusers.py
@@ -274,6 +274,85 @@ class GaudiStableDiffusionPipelineTester(TestCase):
     Tests the StableDiffusionPipeline for Gaudi.
     """
 
+    def merge_peft_adapter(self, model, adapter):
+        from peft import BOFTConfig, LoHaConfig, LoKrConfig, LoraConfig, OFTConfig, get_peft_model
+
+        UNET_TARGET_MODULES = [
+            "to_q",
+            "to_k",
+            "to_v",
+            "proj",
+            "proj_in",
+            "proj_out",
+            "conv",
+            "conv1",
+            "conv2",
+            "conv_shortcut",
+            "to_out.0",
+            "time_emb_proj",
+            "ff.net.2",
+        ]
+        TEXT_ENCODER_TARGET_MODULES = ["fc1", "fc2", "q_proj", "k_proj", "v_proj", "out_proj"]
+        target_modules = (
+            UNET_TARGET_MODULES if isinstance(model, UNet2DConditionModel) else TEXT_ENCODER_TARGET_MODULES
+        )
+
+        if adapter == "lora":
+            config = LoraConfig(
+                r=2,
+                lora_alpha=2,
+                target_modules=target_modules,
+                lora_dropout=0.0,
+                bias="none",
+                init_lora_weights=True,
+            )
+        elif adapter == "loha":
+            config = LoHaConfig(
+                r=2,
+                alpha=2,
+                target_modules=target_modules,
+                rank_dropout=0.0,
+                module_dropout=0.0,
+                use_effective_conv2d=False,
+                init_weights=True,
+            )
+        elif adapter == "lokr":
+            config = LoKrConfig(
+                r=2,
+                alpha=2,
+                target_modules=target_modules,
+                rank_dropout=0.0,
+                module_dropout=0.0,
+                use_effective_conv2d=False,
+                decompose_both=False,
+                decompose_factor=-1,
+                init_weights=True,
+            )
+        elif adapter == "oft":
+            config = OFTConfig(
+                r=2,
+                target_modules=target_modules,
+                module_dropout=0.0,
+                init_weights=True,
+                coft=False,
+                oft_block_size=0,
+                eps=0.0,
+            )
+        elif adapter == "boft":
+            from peft import tuners
+
+            tuners.boft.layer._FBD_CUDA = False
+            config = BOFTConfig(
+                boft_block_size=8,
+                boft_block_num=0,
+                boft_n_butterfly_factor=1,
+                target_modules=target_modules,
+                boft_dropout=0.1,
+                bias="boft_only",
+            )
+        model = get_peft_model(model, config)
+        return model.merge_and_unload()
+
     @pytest.fixture(autouse=True)
     def _use_(self, baseline):
         """
@@ -615,6 +694,37 @@ def test_stable_diffusion_hpu_graphs(self):
         self.assertEqual(len(images), 10)
         self.assertEqual(images[-1].shape, (64, 64, 3))
 
+    @parameterized.expand(["lora", "loha", "lokr", "oft", "boft"])
+    @slow
+    def test_no_peft_regression_bf16(self, peft_adapter):
+        prompts = [
+            "An image of a squirrel in Picasso style",
+        ]
+        num_images_per_prompt = 1
+        batch_size = 1
+        model_name = "runwayml/stable-diffusion-v1-5"
+        scheduler = GaudiDDIMScheduler.from_pretrained(model_name, subfolder="scheduler")
+        pipeline = GaudiStableDiffusionPipeline.from_pretrained(
+            model_name,
+            scheduler=scheduler,
+            use_habana=True,
+            use_hpu_graphs=True,
+            gaudi_config=GaudiConfig.from_pretrained("Habana/stable-diffusion"),
+            torch_dtype=torch.bfloat16,
+        )
+
+        with torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=True):
+            pipeline.unet = self.merge_peft_adapter(pipeline.unet, peft_adapter)
+            pipeline.text_encoder = self.merge_peft_adapter(pipeline.text_encoder, peft_adapter)
+
+        set_seed(27)
+        outputs = pipeline(
+            prompt=prompts,
+            num_images_per_prompt=num_images_per_prompt,
+            batch_size=batch_size,
+        )
+        self.assertEqual(len(outputs.images), num_images_per_prompt * len(prompts))
+
     @slow
     @legacy
     def test_no_throughput_regression_bf16(self):
@@ -813,7 +923,7 @@ def test_sd_textual_inversion(self):
             / "training"
             / "textual_inversion.py"
         )
-
+        install_requirements(path_to_script.parent / "requirements.txt")
         with tempfile.TemporaryDirectory() as data_dir:
             snapshot_download(
                 "diffusers/cat_toy_example", local_dir=data_dir, repo_type="dataset", ignore_patterns=".gitattributes"
@@ -1207,6 +1317,7 @@ def test_sdxl_textual_inversion(self):
             / "training"
             / "textual_inversion_sdxl.py"
         )
+        install_requirements(path_to_script.parent / "requirements.txt")
         with tempfile.TemporaryDirectory() as data_dir:
             snapshot_download(
                 "diffusers/cat_toy_example", local_dir=data_dir, repo_type="dataset", ignore_patterns=".gitattributes"
@@ -2567,6 +2678,7 @@ def test_train_text_to_image_script(self):
             / "training"
             / "train_text_to_image_sdxl.py"
         )
+        install_requirements(path_to_script.parent / "requirements.txt")
 
         cmd_line = f"""ls {path_to_script}""".split()
 
@@ -2587,7 +2699,7 @@ def test_train_text_to_image_sdxl(self):
                 / "training"
                 / "train_text_to_image_sdxl.py"
             )
-
+            install_requirements(path_to_script.parent / "requirements.txt")
             cmd_line = f"""
                  python3
                  {path_to_script}
@@ -2651,7 +2763,7 @@ def test_script_train_controlnet(self):
             / "training"
             / "train_controlnet.py"
         )
-
+        install_requirements(path_to_script.parent / "requirements.txt")
         cmd_line = f"""ls {path_to_script}""".split()
 
         # check find existence
@@ -2673,7 +2785,7 @@ def test_train_controlnet(self):
                 / "training"
                 / "train_controlnet.py"
             )
-
+            install_requirements(path_to_script.parent / "requirements.txt")
             download_files(
                 [
                     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/controlnet_training/conditioning_image_1.png",
@@ -2804,6 +2916,9 @@ def _test_dreambooth(self, extra_config, train_text_encoder=False):
             if train_text_encoder:
                 test_args.append("--train_text_encoder")
             test_args.append(extra_config)
+            if "boft" in extra_config:
+                extra_args = "--unet_block_size 1 --te_block_size 1"
+                test_args.extend(extra_args.split())
             p = subprocess.Popen(test_args)
             return_code = p.wait()
 
@@ -2860,6 +2975,14 @@ def test_dreambooth_oft(self):
     def test_dreambooth_oft_with_text_encoder(self):
         self._test_dreambooth("oft", train_text_encoder=True)
 
+    @slow
+    def test_dreambooth_boft(self):
+        self._test_dreambooth("boft")
+
+    @slow
+    def test_dreambooth_boft_with_text_encoder(self):
+        self._test_dreambooth("boft", train_text_encoder=True)
+
 
 class DreamBoothLoRASDXL(TestCase):
     def _test_dreambooth_lora_sdxl(self, train_text_encoder=False):