vellum-ai · siddseethepalli · Apr 17, 2026 · Apr 17, 2026 · chatgpt-codex-connector · Apr 17, 2026
diff --git a/clients/macos/vellum-assistant/Features/Settings/InferenceServiceCard.swift b/clients/macos/vellum-assistant/Features/Settings/InferenceServiceCard.swift
@@ -507,16 +507,21 @@ struct InferenceServiceCard: View {
         // force-persist provider/model even when IDs happen to match.
         let modeChanged = draftMode != store.inferenceMode
 
-        // Persist mode if changed
-        let pendingMode = modeChanged ? store.setInferenceMode(draftMode) : nil
+        // Persist mode if changed. The mode write is independent of the
+        // provider/model PATCH below — the daemon stores it under
+        // `services.inference.mode`, not `llm.default` — so it can fly
+        // off in parallel without affecting atomicity.
+        if modeChanged {
+            _ = store.setInferenceMode(draftMode)
+        }
 
-        // Persist provider if changed. Also re-persist when the mode
-        // changed — switching between managed and your-own implies a
-        // provider change even if the resolved provider ID happens to
-        // match initialProvider (ensures config stays consistent).
+        // Resolve the provider that will land in `llm.default.provider`.
+        // Also flag re-persist when the mode changed — switching between
+        // managed and your-own implies a provider change even if the
+        // resolved provider ID happens to match initialProvider (ensures
+        // config stays consistent).
         let persistProvider = draftMode == "managed" ? "anthropic" : draftProvider
         let providerChanged = persistProvider != initialProvider || modeChanged
-        let pendingProvider = providerChanged ? store.setLLMDefaultProvider(persistProvider) : nil
         if providerChanged {
             initialProvider = persistProvider
         }
@@ -540,20 +545,18 @@ struct InferenceServiceCard: View {
             })
         }
 
-        // Await the mode and provider patches before writing the model so the
-        // daemon's read-modify-write cycle for the model doesn't overwrite them.
-        store.selectedModel = draftModel
-        let capturedModel = draftModel
-        let saveProvider = draftMode == "managed" ? "anthropic" : draftProvider
-        let forceSend = modeChanged
-        Task {
-            if let pendingMode { _ = await pendingMode.value }
-            if let pendingProvider { _ = await pendingProvider.value }
-            _ = await store.setLLMDefaultModel(
-                capturedModel,
-                provider: saveProvider,
-                force: forceSend
-            ).value
+        // Persist provider+model atomically in a single PATCH when either
+        // changed (or when the mode toggled, which forces a re-persist
+        // even when the resolved IDs match). Splitting the write into two
+        // PATCHes (provider first, model second) lets the daemon's
+        // ConfigWatcher fire between them and reload providers with the
+        // new provider but the OLD model — potentially incompatible
+        // (e.g. an OpenAI model ID against the Anthropic provider). The
+        // combined setter writes both keys in one round-trip so the
+        // daemon never observes a half-applied state.
+        let modelChanged = draftModel != initialModel
+        if providerChanged || modelChanged {
+            _ = store.setLLMDefault(provider: persistProvider, model: draftModel)
         }
         initialModel = draftModel
     }

diff --git a/clients/macos/vellum-assistant/Features/Settings/SettingsStore.swift b/clients/macos/vellum-assistant/Features/Settings/SettingsStore.swift
@@ -2983,6 +2983,13 @@ public final class SettingsStore: ObservableObject {
     /// the unified `llm.default.provider` key. This is the canonical write
     /// path now that the workspace migration consolidates LLM call-site
     /// settings under `llm.*` (see PR 4 of the unify-llm-callsites plan).
+    ///
+    /// Prefer `setLLMDefault(provider:model:)` when both keys change together
+    /// — it writes them atomically in a single PATCH so the daemon's
+    /// `ConfigWatcher` cannot observe a half-applied state with the new
+    /// provider but the old (potentially incompatible) model. This single-key
+    /// variant is kept for future flows that legitimately want to change just
+    /// the provider without touching the model.
     @discardableResult
     func setLLMDefaultProvider(_ provider: String) -> Task<Bool, Never> {
         selectedInferenceProvider = provider
@@ -3002,6 +3009,14 @@ public final class SettingsStore: ObservableObject {
     /// Persists the default LLM provider+model pair under `llm.default`.
     /// Both keys are written together so the daemon's read-modify-write cycle
     /// observes a consistent pair.
+    ///
+    /// Prefer `setLLMDefault(provider:model:)` when both keys are being
+    /// changed in response to a single user action — it skips the dedup
+    /// gating below and writes a clean atomic PATCH. This entry point is
+    /// retained for flows that want the dedup-aware "only patch if the
+    /// resolved pair actually moved" semantics, e.g. routing-source refresh
+    /// pushing the daemon's authoritative selection back through the same
+    /// helper.
     @discardableResult
     func setLLMDefaultModel(
         _ model: String,
@@ -3036,6 +3051,39 @@ public final class SettingsStore: ObservableObject {
         return task
     }
 
+    /// Persists the default LLM provider+model pair atomically in a single
+    /// `llm.default` PATCH. Use this whenever both keys are being changed in
+    /// response to a single user action (e.g. the inference settings save).
+    ///
+    /// Splitting the write into two PATCH requests (provider first, model
+    /// second) gives the daemon's `ConfigWatcher` a window to fire on the
+    /// provider-only change and reload providers with the OLD model — which
+    /// may not exist in the new provider's catalog (Anthropic provider trying
+    /// to use an OpenAI model ID, etc.). Writing both keys in one PATCH
+    /// closes that window.
+    @discardableResult
+    func setLLMDefault(provider: String, model: String) -> Task<Bool, Never> {
+        lastDaemonModel = model
+        lastDaemonProvider = provider
+        selectedModel = model
+        selectedInferenceProvider = provider
+        let task = Task {
+            let success = await settingsClient.patchConfig([
+                "llm": ["default": ["provider": provider, "model": model]]
+            ])
+            if !success {
+                log.error("Failed to patch config for llm.default.{provider,model}")
+                if lastDaemonModel == model {
+                    lastDaemonModel = nil
+                    lastDaemonProvider = nil
+                }
+            }
+            return success
+        }
+        scheduleRoutingSourceRefresh()
+        return task
+    }
+
     // MARK: - Per-Call-Site Override Read / Write
 
     /// Number of entries in `callSiteOverrides` that have at least one

diff --git a/clients/macos/vellum-assistantTests/SettingsStoreInferenceTests.swift b/clients/macos/vellum-assistantTests/SettingsStoreInferenceTests.swift
@@ -5,7 +5,7 @@ import XCTest
 /// Verifies that `SettingsStore` reads inference provider/model from the
 /// unified `llm.default.*` keys (with a fallback to `services.inference.*`
 /// for unmigrated configs) and writes through the new
-/// `setLLMDefaultProvider` / `setLLMDefaultModel` APIs.
+/// `setLLMDefaultProvider` / `setLLMDefaultModel` / `setLLMDefault` APIs.
 @MainActor
 final class SettingsStoreInferenceTests: XCTestCase {
 
@@ -223,6 +223,48 @@ final class SettingsStoreInferenceTests: XCTestCase {
         XCTAssertEqual(store.selectedInferenceProvider, "openai")
     }
 
+    // MARK: - Write path: setLLMDefault (atomic provider+model)
+
+    /// The combined `setLLMDefault(provider:model:)` setter must emit a
+    /// single PATCH carrying both keys. Splitting the write into two
+    /// PATCHes (provider first, model second) lets the daemon's
+    /// `ConfigWatcher` fire between them and reload providers with the
+    /// new provider but the OLD (potentially incompatible) model — that
+    /// race is exactly what this combined helper exists to close.
+    func testSetLLMDefaultEmitsSingleAtomicPatch() {
+        _ = store.setLLMDefault(provider: "openai", model: "gpt-4.1")
+        waitForPatchCount(1)
+
+        XCTAssertEqual(
+            mockSettingsClient.patchConfigCalls.count,
+            1,
+            "setLLMDefault must persist provider+model in a single PATCH"
+        )
+
+        let patch = lastLLMDefaultPatch()
+        XCTAssertNotNil(patch, "expected an llm.default patch payload")
+        XCTAssertEqual(patch?["provider"] as? String, "openai")
+        XCTAssertEqual(patch?["model"] as? String, "gpt-4.1")
+    }
+
+    func testSetLLMDefaultDoesNotEmitServicesInferencePatch() {
+        _ = store.setLLMDefault(provider: "openai", model: "gpt-4.1")
+        waitForPatchCount(1)
+
+        XCTAssertNil(
+            lastServicesInferencePatch(),
+            "setLLMDefault must not write to services.inference.*"
+        )
+    }
+
+    func testSetLLMDefaultUpdatesSelectedState() {
+        _ = store.setLLMDefault(provider: "openai", model: "gpt-4.1")
+        waitForPatchCount(1)
+
+        XCTAssertEqual(store.selectedModel, "gpt-4.1")
+        XCTAssertEqual(store.selectedInferenceProvider, "openai")
+    }
+
     // MARK: - Legacy setInferenceMode is preserved
 
     /// `setInferenceMode` continues to write to `services.inference.mode` —