diff --git a/nemo/collections/nlp/parts/mixins/nlp_adapter_mixins.py b/nemo/collections/nlp/parts/mixins/nlp_adapter_mixins.py index 7741e54c7fdb..5bfccd8624c7 100644 --- a/nemo/collections/nlp/parts/mixins/nlp_adapter_mixins.py +++ b/nemo/collections/nlp/parts/mixins/nlp_adapter_mixins.py @@ -175,6 +175,8 @@ def add_adapter(self, peft_cfgs: Union[PEFTConfig, List[PEFTConfig]]): if self.cfg.get('virtual_pipeline_model_parallel_size', None): raise ValueError('Virtual pipeline model parallel is not supported when using PEFT') + if self.cfg.optim.name == "distributed_fused_adam": + raise ValueError('distributed_fused_adam is not supported for PEFT. Please use fused_adam') if not isinstance(peft_cfgs, List): peft_cfgs = [peft_cfgs]