Skip to content
19 changes: 19 additions & 0 deletions studio/backend/routes/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,7 @@ async def load_model(
else:
if (
backend.active_model_name
and not backend.active_model_name.lower().endswith(".gguf")
and backend.active_model_name.lower() == model_identifier.lower()
):
logger.info(
Expand Down Expand Up @@ -573,6 +574,24 @@ async def load_model(
chat_template = _chat_template,
)

# ── Local GGUF already-loaded: kill before reload ──────────
# When settings change triggers a reload of a local GGUF model
# (no gguf_variant), kill the existing llama-server so the GGUF
# reload path below can start fresh with new params. Without this,
# the reload falls through to the transformers path, which fails
# because GGUF files don't have a HuggingFace config.json.
if (
not request.gguf_variant
and llama_backend.is_loaded
and llama_backend.model_identifier
and llama_backend.model_identifier.lower() == model_identifier.lower()
):
Comment on lines +584 to +588

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Defer local GGUF unload until validations pass

This preemptive llama_backend.unload_model() runs before request validation, so a bad reload request can terminate a healthy serving model and then return an error. For example, when the same local GGUF is loaded and the caller sends gpu_ids, this branch unloads first, then the GGUF path rejects the request with 400 (gpu_ids is not supported for GGUF models yet), leaving inference with no model loaded. Please move this unload until after request/config validation that can fail.

Useful? React with 👍 / 👎.

logger.info(
f"Local GGUF already loaded, reloading with updated settings: "
f"{model_log_label}"
)
llama_backend.unload_model()

# Create config using clean factory method
# is_lora is auto-detected from adapter_config.json on disk/HF
config = ModelConfig.from_identifier(
Expand Down