Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 12 additions & 14 deletions src/axolotl/utils/schemas/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,10 +369,10 @@ def check_fp8_config(cls, data):
"see speed improvements. Please consider setting `torch_compile: "
"true` in your config."
)
fsdp_config = data.get("fsdp_config") or {}
if data.get("fp8") and (
data.get("fsdp_config", {}).get("activation_checkpointing", False) is True
or data.get("fsdp_config", {}).get("fsdp_activation_checkpointing", False)
is True
fsdp_config.get("activation_checkpointing", False) is True
or fsdp_config.get("fsdp_activation_checkpointing", False) is True
):
LOG.warning(
"FP8 + FSDP2 + activation checkpointing may be slower than BF16 "
Expand Down Expand Up @@ -817,13 +817,13 @@ def check_fsdp2_base_model_quant_rl(cls, data):
@model_validator(mode="before")
@classmethod
def check_fsdp_version_in_fsdp_config(cls, data):
if data.get("fsdp_config"):
if data.get("fsdp_config", {}).get("fsdp_version"):
LOG.warning(
"Configuring `fsdp_version` in `fsdp_config` is deprecated. "
"Please configure `fsdp_version` as a top-level field."
)
data["fsdp_version"] = data.get("fsdp_config").pop("fsdp_version")
fsdp_config = data.get("fsdp_config") or {}
if fsdp_config and fsdp_config.get("fsdp_version"):
LOG.warning(
"Configuring `fsdp_version` in `fsdp_config` is deprecated. "
"Please configure `fsdp_version` as a top-level field."
)
data["fsdp_version"] = fsdp_config.pop("fsdp_version")
Comment on lines +820 to +826

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Avoid silently overriding top-level fsdp_version when both are set; handle conflicts and reduce log spam

Current code promotes the nested value unconditionally, which can overwrite an explicitly-set top-level fsdp_version without warning if both are present and differ. Also, switch to warning_once to avoid repeated logs across multiple validations.

Proposed update:

  • If both top-level and nested fsdp_version exist and differ, keep the top-level and drop the nested with a warning about the conflict.
  • Otherwise, promote nested to top-level and pop it from fsdp_config.
  • Keep the None-safe pattern.
-        fsdp_config = data.get("fsdp_config") or {}
-        if fsdp_config and fsdp_config.get("fsdp_version"):
-            LOG.warning(
-                "Configuring `fsdp_version` in `fsdp_config` is deprecated. "
-                "Please configure `fsdp_version` as a top-level field."
-            )
-            data["fsdp_version"] = fsdp_config.pop("fsdp_version")
+        fsdp_config = data.get("fsdp_config") or {}
+        nested_version = fsdp_config.get("fsdp_version")
+        if nested_version is not None:
+            existing = data.get("fsdp_version")
+            if existing is not None and str(existing) != str(nested_version):
+                LOG.warning(
+                    "Conflicting `fsdp_version`: top-level=%s, fsdp_config=%s; "
+                    "keeping top-level and dropping nested.",
+                    existing,
+                    nested_version,
+                )
+                fsdp_config.pop("fsdp_version", None)
+            else:
+                LOG.warning_once(
+                    "Configuring `fsdp_version` in `fsdp_config` is deprecated. "
+                    "Please configure `fsdp_version` as a top-level field."
+                )
+                data["fsdp_version"] = fsdp_config.pop("fsdp_version")
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
fsdp_config = data.get("fsdp_config") or {}
if fsdp_config and fsdp_config.get("fsdp_version"):
LOG.warning(
"Configuring `fsdp_version` in `fsdp_config` is deprecated. "
"Please configure `fsdp_version` as a top-level field."
)
data["fsdp_version"] = fsdp_config.pop("fsdp_version")
fsdp_config = data.get("fsdp_config") or {}
nested_version = fsdp_config.get("fsdp_version")
if nested_version is not None:
existing = data.get("fsdp_version")
if existing is not None and str(existing) != str(nested_version):
LOG.warning(
"Conflicting `fsdp_version`: top-level=%s, fsdp_config=%s; "
"keeping top-level and dropping nested.",
existing,
nested_version,
)
fsdp_config.pop("fsdp_version", None)
else:
LOG.warning_once(
"Configuring `fsdp_version` in `fsdp_config` is deprecated. "
"Please configure `fsdp_version` as a top-level field."
)
data["fsdp_version"] = fsdp_config.pop("fsdp_version")
🤖 Prompt for AI Agents
In src/axolotl/utils/schemas/validation.py around lines 820 to 826, the current
code unconditionally promotes a nested fsdp_version into the top-level which can
silently overwrite an explicit top-level value and spam logs; change the logic
to: read fsdp_config safely, if nested fsdp_version exists then check if data
already has a top-level fsdp_version — if both exist and differ, keep the
top-level value, pop the nested fsdp_config["fsdp_version"], and emit a single
warning_once describing the conflict and that the nested value was ignored;
otherwise (no top-level present) promote the nested value to
data["fsdp_version"], pop it from fsdp_config, and use warning_once for the
deprecation message when promoting.

return data

@model_validator(mode="before")
Expand Down Expand Up @@ -1151,10 +1151,8 @@ def check_gptq_w_revision(cls, data):
@classmethod
def check_gpt_oss_fsdp_loading(cls, data):
if data.get("model_quantization_config", "") == "Mxfp4Config":
if (
data.get("fsdp_config", {}).get("cpu_ram_efficient_loading", False)
is True
):
fsdp_config = data.get("fsdp_config") or {}
if fsdp_config.get("cpu_ram_efficient_loading", False) is True:
raise ValueError(
Comment thread
coderabbitai[bot] marked this conversation as resolved.
"FSDP cpu_ram_efficient_loading is not supported for Mxfp4Config model quantization."
)
Expand Down