Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
aa438ce
feat: inject local model provider into recipe jobs via JWT
wasimysaid Apr 3, 2026
0beffea
feat: auto-generate JWT for local model providers in recipes
wasimysaid Apr 3, 2026
19d657a
feat: add is_local flag to model provider config types and utils
wasimysaid Apr 3, 2026
694d3c4
fix(studio): skip endpoint validation for local providers
wasimysaid Apr 3, 2026
b4e9ff6
feat(studio): add local/external model source toggle to provider dialog
wasimysaid Apr 3, 2026
8028495
feat(studio): thread localProviderNames through model config dialog c…
wasimysaid Apr 3, 2026
754eb54
feat(studio): show 'Local model (Chat)' label for local model_provide…
wasimysaid Apr 3, 2026
b351ffa
fix: hardcode loopback for local endpoint, clear stale creds on toggle
wasimysaid Apr 3, 2026
c83b3db
fix: document TOCTOU/JWT rotation, add deferred import comments, fix …
wasimysaid Apr 3, 2026
b3ae2dd
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 3, 2026
100ba0b
Merge branch 'main' into feat/local-model-provider
wasimysaid Apr 7, 2026
4c997e9
fix(studio): clear stale local model state on provider toggle and val…
wasimysaid Apr 7, 2026
1b19db5
fix(studio): override empty local endpoint in validation and skip mod…
wasimysaid Apr 7, 2026
10f1fea
fix(studio): resolve loopback port from app.state, clear stale local …
Apr 8, 2026
1259ebd
fix(studio): narrow store cascade types, sync model placeholder on gr…
Apr 8, 2026
1cbc8a3
fix(studio): strict is_local check, narrow loaded-model gate to LLM-r…
Apr 8, 2026
d21b077
fix(studio): force skip_health_check on local-linked configs, skip JS…
Apr 8, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 163 additions & 1 deletion studio/backend/routes/data_recipe/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@

from __future__ import annotations

from datetime import timedelta
from typing import Any
from urllib.parse import urlparse

from fastapi import APIRouter, HTTPException, Query, Request
from fastapi.responses import JSONResponse, StreamingResponse
Expand All @@ -26,6 +28,161 @@
router = APIRouter()


def _resolve_local_v1_endpoint(request: Request) -> str:
"""Return the loopback /v1 URL for the actual backend listen port.

Resolution order:
1. ``app.state.server_port`` - explicitly published by run.py after
the uvicorn server has bound. This is the most reliable source
because it survives reverse proxies, TLS terminators and tunnels.
2. ``request.scope["server"]`` - the real (host, port) tuple uvicorn
sets when the request is dispatched. Used when Studio is started
outside ``run_server`` (e.g. ``uvicorn studio.backend.main:app``).
3. ``request.base_url`` parsed - last resort for test fixtures that
do not route through a live uvicorn server.
"""
port: Any = getattr(request.app.state, "server_port", None)
if not isinstance(port, int) or port <= 0:
server = request.scope.get("server")
if (
isinstance(server, tuple)
and len(server) >= 2
and isinstance(server[1], int)
and server[1] > 0
):
port = server[1]
else:
parsed = urlparse(str(request.base_url))
port = parsed.port if parsed.port is not None else 8888
return f"http://127.0.0.1:{int(port)}/v1"


def _used_llm_model_aliases(recipe: dict[str, Any]) -> set[str]:
"""Return the set of model_aliases that are actually referenced by an
LLM column. Used to narrow the "Chat model loaded" gate so that orphan
model_config nodes on the canvas do not block unrelated recipe runs.

The ``llm-`` prefix matches the existing convention in
``core/data_recipe/service.py::_recipe_has_llm_columns`` and covers all
LLM column types emitted by the frontend (llm-text, llm-code,
llm-structured, llm-judge).
"""
aliases: set[str] = set()
for column in recipe.get("columns", []):
if not isinstance(column, dict):
continue
column_type = column.get("column_type")
if not isinstance(column_type, str) or not column_type.startswith("llm-"):
continue
alias = column.get("model_alias")
if isinstance(alias, str) and alias:
aliases.add(alias)
return aliases


def _inject_local_providers(recipe: dict[str, Any], request: Request) -> None:
"""
Mutate recipe dict in-place: for any provider with is_local=True,
generate a JWT and fill in the endpoint pointing at this server.
"""
providers = recipe.get("model_providers")
if not providers:
return

# Collect local providers and pop is_local from ALL dicts unconditionally.
# Strict `is True` guard so malformed payloads (is_local: 1,
# is_local: "true") do not accidentally trigger the loopback rewrite.
local_indices: list[int] = []
for i, provider in enumerate(providers):
if not isinstance(provider, dict):
continue
is_local = provider.pop("is_local", None)
if is_local is True:
local_indices.append(i)

if not local_indices:
return

endpoint = _resolve_local_v1_endpoint(request)

# Only gate on model-loaded if a local provider is actually reachable
# from an LLM column through a model_config. Orphan model_config nodes
# that reference a local provider but that no LLM column uses should
# not block runs; the recipe would never call /v1 for them.
local_names = {
providers[i].get("name") for i in local_indices if providers[i].get("name")
}
used_aliases = _used_llm_model_aliases(recipe)
referenced_providers = {
mc.get("provider")
for mc in recipe.get("model_configs", [])
if (
isinstance(mc, dict)
and mc.get("provider")
and mc.get("alias") in used_aliases
)
}

token = ""
if local_names & referenced_providers:
# Verify a model is loaded.
# NOTE: This is a point-in-time check (TOCTOU). The model could be unloaded
# or swapped after this check but before the recipe subprocess calls /v1.
# The inference endpoint returns a clear 400 in that case.
#
# Imports are deferred to avoid circular dependencies with inference modules.
from routes.inference import get_llama_cpp_backend
from core.inference import get_inference_backend

llama = get_llama_cpp_backend()
model_loaded = llama.is_loaded
if not model_loaded:
backend = get_inference_backend()
model_loaded = bool(backend.active_model_name)
if not model_loaded:
raise ValueError(
"No model loaded in Chat. Load a model first, then run the recipe."
)

from auth.authentication import (
create_access_token,
) # deferred: avoids circular import

# Uses the "unsloth" admin subject. If the user changes their password,
# the JWT secret rotates and this token becomes invalid mid-run.
# Acceptable for v1 - recipes typically finish well within one session.
token = create_access_token(
subject = "unsloth",
expires_delta = timedelta(hours = 24),
)

# Defensively strip any stale "external"-only fields the frontend may
# have left on the dict (extra_headers/extra_body/api_key_env). The UI
# hides these inputs in local mode but the payload builder still serializes
# them, so a previously external provider that flipped to local can carry
# invalid JSON or rogue auth headers into the local /v1 call.
for i in local_indices:
providers[i]["endpoint"] = endpoint
providers[i]["api_key"] = token
providers[i]["provider_type"] = "openai"
providers[i].pop("api_key_env", None)
providers[i].pop("extra_headers", None)
providers[i].pop("extra_body", None)

# Force skip_health_check on any model_config that references a local
# provider. The local /v1/models endpoint only lists the real loaded
# model (e.g. "unsloth/llama-3.2-1b") and not the placeholder "local"
# that the recipe sends as the model id, so data_designer's pre-flight
# health check would otherwise fail before the first completion call.
# The backend route ignores the model id field in chat completions, so
# skipping the check is safe.
for mc in recipe.get("model_configs", []):
if not isinstance(mc, dict):
continue
if mc.get("provider") in local_names:
mc["skip_health_check"] = True


def _normalize_run_name(value: Any) -> str | None:
if value is None:
return None
Expand All @@ -40,7 +197,7 @@ def _normalize_run_name(value: Any) -> str | None:


@router.post("/jobs", response_class = JSONResponse, response_model = JobCreateResponse)
def create_job(payload: RecipePayload):
def create_job(payload: RecipePayload, request: Request):
recipe = payload.recipe
if not recipe.get("columns"):
raise HTTPException(status_code = 400, detail = "Recipe must include columns.")
Expand All @@ -67,6 +224,11 @@ def create_job(payload: RecipePayload):
status_code = 400, detail = f"invalid run_config: {exc}"
) from exc

try:
_inject_local_providers(recipe, request)
except ValueError as exc:
raise HTTPException(status_code = 400, detail = str(exc)) from exc

mgr = get_job_manager()
try:
job_id = mgr.start(recipe = recipe, run = run)
Expand Down
16 changes: 16 additions & 0 deletions studio/backend/routes/data_recipe/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,20 @@ def _collect_validation_errors(recipe: dict[str, Any]) -> list[ValidateError]:
return errors


def _patch_local_providers(recipe: dict[str, Any]) -> None:
"""Strip is_local and fill a dummy endpoint so validation doesn't choke.

Uses a strict `is True` check to match _inject_local_providers in
jobs.py - malformed payloads with truthy but non-boolean is_local
values should not be treated as local.
"""
for provider in recipe.get("model_providers", []):
if not isinstance(provider, dict):
continue
if provider.pop("is_local", None) is True:
provider["endpoint"] = "http://127.0.0.1"
Comment on lines +81 to +82

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Align local-provider validation patch with job injection

The validation path only rewrites local providers to endpoint="http://127.0.0.1", but it does not apply the same local-mode mutations used in create_job (JWT injection and skip_health_check on linked model configs). Because validate() still calls validate_recipe(recipe), local recipes that use placeholder model IDs like "local" can fail preflight model/provider checks during “Check recipe” even though the run path succeeds after _inject_local_providers mutates the payload.

Useful? React with 👍 / 👎.



@router.post("/validate", response_model = ValidateResponse)
def validate(payload: RecipePayload) -> ValidateResponse:
recipe = payload.recipe
Expand All @@ -77,6 +91,8 @@ def validate(payload: RecipePayload) -> ValidateResponse:
errors = [ValidateError(message = "Recipe must include columns.")],
)

_patch_local_providers(recipe)

try:
validate_recipe(recipe)
except RuntimeError as exc:
Expand Down
8 changes: 8 additions & 0 deletions studio/backend/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,14 @@ def run_server(
_server = uvicorn.Server(config)
_shutdown_event = Event()

# Expose the actual bound port so request-handling code can build
# loopback URLs that point at the real backend, not whatever port a
# reverse proxy or tunnel exposed in the request URL. Only publish
# an explicit value when we know the concrete port; for ephemeral
# binds (port==0) leave it unset and let request handlers fall back
# to the ASGI request scope or request.base_url.
app.state.server_port = port if port and port > 0 else None

# Run server in a daemon thread
def _run():
asyncio.run(_server.serve())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ export function renderBlockDialog(
categoryOptions: SamplerConfig[],
modelConfigAliases: string[],
modelProviderOptions: string[],
localProviderNames: Set<string>,
toolProfileAliases: string[],
datetimeOptions: string[],
onUpdate: (id: string, patch: Partial<NodeConfig>) => void,
Expand Down Expand Up @@ -109,6 +110,7 @@ export function renderBlockDialog(
<ModelConfigDialog
config={config}
providerOptions={modelProviderOptions}
localProviderNames={localProviderNames}
onUpdate={update}
/>
) : null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,21 @@ type InlineModelPatch = Partial<ModelProviderConfig> | Partial<ModelConfig>;

type InlineModelProps = {
config: ModelProviderConfig | ModelConfig;
localProviderNames?: Set<string>;
onUpdate: (patch: InlineModelPatch) => void;
};

export function InlineModel(props: InlineModelProps): ReactElement {
if (props.config.kind === "model_provider") {
if (props.config.is_local) {
return (
<div className="flex items-center gap-2 px-1 py-0.5">
<span className="text-xs font-medium text-muted-foreground">
Local model (Chat)
</span>
</div>
);
}
return (
<div className="grid gap-3 sm:grid-cols-2">
<InlineField label="Endpoint">
Expand Down Expand Up @@ -42,21 +52,40 @@ export function InlineModel(props: InlineModelProps): ReactElement {
);
}

// model_config branch - mirror the local-aware provider sync from the
// dialog path so inline edits do not leave stale "local" placeholders
// on external providers and fill the placeholder when switching to local.
const localNames = props.localProviderNames ?? new Set<string>();
const modelConfig = props.config;
const handleProviderChange = (nextProvider: string) => {
const isLocal = localNames.has(nextProvider);
if (isLocal && !modelConfig.model.trim()) {
props.onUpdate({ provider: nextProvider, model: "local" });
return;
}
if (!isLocal && modelConfig.model === "local") {
props.onUpdate({ provider: nextProvider, model: "" });
return;
}
props.onUpdate({ provider: nextProvider });
};
const isLinkedToLocal = localNames.has(modelConfig.provider);

return (
<div className="grid gap-3 sm:grid-cols-2">
<InlineField label="Provider">
<Input
className="nodrag h-8 w-full text-xs"
placeholder="provider alias"
value={props.config.provider}
onChange={(event) => props.onUpdate({ provider: event.target.value })}
value={modelConfig.provider}
onChange={(event) => handleProviderChange(event.target.value)}
/>
</InlineField>
<InlineField label="Model">
<Input
className="nodrag h-8 w-full text-xs"
placeholder="gpt-4o-mini"
value={props.config.model}
placeholder={isLinkedToLocal ? "local" : "gpt-4o-mini"}
value={modelConfig.model}
onChange={(event) => props.onUpdate({ model: event.target.value })}
/>
</InlineField>
Expand All @@ -65,7 +94,7 @@ export function InlineModel(props: InlineModelProps): ReactElement {
className="nodrag h-8 w-full text-xs"
type="number"
placeholder="0.7"
value={props.config.inference_temperature ?? ""}
value={modelConfig.inference_temperature ?? ""}
onChange={(event) =>
props.onUpdate({
// biome-ignore lint/style/useNamingConvention: api schema
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import {
Position,
useUpdateNodeInternals,
} from "@xyflow/react";
import { type ReactElement, memo, useEffect } from "react";
import { type ReactElement, memo, useEffect, useMemo } from "react";
import {
MAX_NODE_WIDTH,
MAX_NOTE_NODE_WIDTH,
Expand Down Expand Up @@ -287,6 +287,7 @@ function renderNodeBody(
config: NodeConfig | undefined,
summary: string,
updateConfig: (id: string, patch: Partial<NodeConfig>) => void,
localProviderNames: Set<string>,
): ReactElement {
if (config?.kind === "markdown_note") {
return <MarkdownPreview markdown={config.markdown} />;
Expand All @@ -300,7 +301,13 @@ function renderNodeBody(
return <InlineSampler config={config} onUpdate={onUpdate} />;
}
if (config.kind === "model_provider" || config.kind === "model_config") {
return <InlineModel config={config} onUpdate={onUpdate} />;
return (
<InlineModel
config={config}
localProviderNames={localProviderNames}
onUpdate={onUpdate}
/>
);
}
if (config.kind === "llm") {
return <InlineLlm config={config} onUpdate={onUpdate} />;
Expand Down Expand Up @@ -355,6 +362,16 @@ function RecipeGraphNodeBase({
const config = useRecipeStudioStore((state) => state.configs[id]);
const openConfig = useRecipeStudioStore((state) => state.openConfig);
const updateConfig = useRecipeStudioStore((state) => state.updateConfig);
const allConfigs = useRecipeStudioStore((state) => state.configs);

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Stop subscribing each graph node to global configs

This selector makes every RecipeGraphNodeBase subscribe to the entire configs object, so any config update (including editing a single node) invalidates all node subscriptions and rerenders the whole canvas. On larger recipes this introduces avoidable O(N) rerender churn and noticeably degrades editor responsiveness; derive local-provider names once at a higher level or use a narrower selector.

Useful? React with 👍 / 👎.

const localProviderNames = useMemo(() => {
const names = new Set<string>();
for (const cfg of Object.values(allConfigs)) {
if (cfg.kind === "model_provider" && cfg.is_local === true) {
names.add(cfg.name);
}
}
return names;
}, [allConfigs]);
const llmAuxVisible = useRecipeStudioStore(
(state) => state.llmAuxVisibility[id] ?? false,
);
Expand Down Expand Up @@ -418,7 +435,12 @@ function RecipeGraphNodeBase({
data.kind === "tool_config" ||
data.kind === "validator";
const summary = getConfigSummary(config);
const nodeBody = renderNodeBody(config, summary, updateConfig);
const nodeBody = renderNodeBody(
config,
summary,
updateConfig,
localProviderNames,
);
const canShowLlmAux =
config?.kind === "llm" &&
(Boolean(config.prompt.trim()) ||
Expand Down
Loading