From 73c913a9a6f0d535e31b3c52be828991fd80dc90 Mon Sep 17 00:00:00 2001 From: Aaron Stannard Date: Sat, 23 May 2026 19:50:24 +0000 Subject: [PATCH 1/3] fix(providers): route startup capability detection through composite resolver (#987) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vLLM's backend strategy returns (modalities: null, context: N) by design, expecting downstream resolvers to fill the null modality fields. The runtime CompositeCapabilityResolver does exactly that — first non-null per field across the chain, oracles always run. But at daemon startup, ResolveStartupCapabilities ran a hand-rolled three-step sequence (Ollama -> OpenAI-compatible -> OpenRouter) that returned immediately on the first non-null result. vLLM returned the context window, startup treated that as success, and HuggingFace was never queried. Result: vision-capable models served via vLLM (e.g. Qwen/Qwen3.6-35B-A3B-FP8) advertised text-only modality, forcing users to hand-edit InputModalities in netclaw.json — edits that the model picker subsequently wipes (sibling pain in #1127). Rewire startup to build the same resolver list and hand it to CompositeCapabilityResolver, which already implements (and tests) the correct merge / timeout / per-resolver error semantics. The HF resolver is added unconditionally as an oracle so it fills modality fields the provider-specific resolver left null. Verified against http://spark-362c:8000 (live vLLM): /v1/models returns Qwen/Qwen3.6-35B-A3B-FP8 + max_model_len=262144, and HF returns pipeline_tag=image-text-to-text for that exact id — which the existing MapPipelineTag already converts to (Text|Image, Text). No HF resolver or normalizer changes needed; the missing piece was strictly the startup wiring. Existing CompositeCapabilityResolverTests.PartialResults_MergeAcrossResolvers already covers the vLLM+HF handoff in unit form, so no new test added. Out of scope (deferred follow-ups): - HF /api/models?search fuzzy fallback for friendly vLLM aliases like 'qwen36-ultimate' (other half of #987). - Config schema refactor (#1127) — likely moot once auto-detection works reliably. --- src/Netclaw.Daemon/Program.cs | 76 ++++++++++++++++------------------- 1 file changed, 35 insertions(+), 41 deletions(-) diff --git a/src/Netclaw.Daemon/Program.cs b/src/Netclaw.Daemon/Program.cs index 7f19587d6..db33a5788 100644 --- a/src/Netclaw.Daemon/Program.cs +++ b/src/Netclaw.Daemon/Program.cs @@ -1051,10 +1051,15 @@ static void ConfigureDaemonServices( } /// -/// One-time capability detection at startup. Creates temporary HTTP resources -/// to query the hosting provider (Ollama) or OpenRouter public catalog before -/// the DI container is built. -/// Returns null if detection fails (caller falls back to text-only). +/// One-time capability detection at startup. Builds a resolver chain +/// (provider-specific resolver + oracles) and merges partial results +/// across them via — first +/// non-null per field wins. This is what lets a vLLM probe supply the +/// context window while HuggingFace fills in the modality flags it +/// intentionally left null. Runs before the DI container is built, so +/// resolvers are instantiated by hand with a shared transient +/// . Returns null if every resolver in the chain +/// produced nothing (caller falls back to text-only). /// static ResolvedModelCapabilities? ResolveStartupCapabilities( string modelId, LogLevel logLevel, string? providerType, string? ollamaEndpoint, string? openAiCompatibleEndpoint, string? openAiCompatibleApiKey) @@ -1065,57 +1070,46 @@ static void ConfigureDaemonServices( using var loggerFactory = LoggerFactory.Create(b => b.SetMinimumLevel(logLevel)); var logger = loggerFactory.CreateLogger("Netclaw.Startup"); - // Provider-first: try Ollama /api/show when running against an Ollama backend + var resolvers = new List(); + + // Provider-specific resolver — narrow but authoritative for the + // active backend's local quirks (Ollama's /api/show, vLLM's + // max_model_len, llama.cpp's /props). Each typically supplies only + // a subset of fields — modalities or context window, rarely both. if (providerType?.Equals("ollama", StringComparison.OrdinalIgnoreCase) == true && ollamaEndpoint is not null) { - var ollamaResolver = new OllamaCapabilityResolver( - httpClient, loggerFactory.CreateLogger(), ollamaEndpoint); - var ollamaResult = ollamaResolver.ResolveAsync(modelId, CancellationToken.None) - .GetAwaiter().GetResult(); - - if (ollamaResult is not null) - { - logger.LogInformation( - "Auto-detected model capabilities for {ModelId}: input={Input}, output={Output}, context_window={ContextWindow}", - modelId, - ollamaResult.InputModalities?.ToString() ?? "unknown", - ollamaResult.OutputModalities?.ToString() ?? "unknown", - ollamaResult.ContextWindowTokens?.ToString() ?? "unknown"); - return ollamaResult; - } + resolvers.Add(new OllamaCapabilityResolver( + httpClient, loggerFactory.CreateLogger(), ollamaEndpoint)); } - - if (providerType?.Equals("openai-compatible", StringComparison.OrdinalIgnoreCase) == true + else if (providerType?.Equals("openai-compatible", StringComparison.OrdinalIgnoreCase) == true && openAiCompatibleEndpoint is not null) { - var openAiCompatibleResolver = new OpenAiCompatibleCapabilityResolver( + resolvers.Add(new OpenAiCompatibleCapabilityResolver( httpClient, loggerFactory.CreateLogger(), openAiCompatibleEndpoint, - openAiCompatibleApiKey); - var openAiCompatibleResult = openAiCompatibleResolver.ResolveAsync(modelId, CancellationToken.None) - .GetAwaiter().GetResult(); - - if (openAiCompatibleResult is not null) - { - logger.LogInformation( - "Auto-detected model capabilities for {ModelId}: input={Input}, output={Output}, context_window={ContextWindow}", - modelId, - openAiCompatibleResult.InputModalities?.ToString() ?? "unknown", - openAiCompatibleResult.OutputModalities?.ToString() ?? "unknown", - openAiCompatibleResult.ContextWindowTokens?.ToString() ?? "unknown"); - return openAiCompatibleResult; - } + openAiCompatibleApiKey)); } - // Fallback: OpenRouter public catalog (works for models from any provider) + // Oracles — always queried regardless of provider type. The + // composite's merge picks up modality / context-window fields the + // provider-specific resolver left null. OpenRouter covers + // commercial models; HuggingFace covers open-source weights that + // self-hosted backends (vLLM, llama.cpp) serve under HF-shaped ids. var openRouterDescriptor = new OpenRouterDescriptor(httpClient); var registry = new ProviderDescriptorRegistry([openRouterDescriptor]); - var resolver = new OpenRouterOracleResolver( - httpClient, loggerFactory.CreateLogger(), registry); + resolvers.Add(new OpenRouterOracleResolver( + httpClient, loggerFactory.CreateLogger(), registry)); + resolvers.Add(new HuggingFaceCapabilityResolver( + httpClient, loggerFactory.CreateLogger())); + + var composite = new CompositeCapabilityResolver( + resolvers, + loggerFactory.CreateLogger(), + activeProviderForModel: _ => providerType); - var result = resolver.ResolveAsync(modelId, CancellationToken.None) + var result = composite.ResolveAsync(modelId, CancellationToken.None) .GetAwaiter().GetResult(); if (result is not null) From 92e2c6df52de4ae4c721a4ed7b0dbdc78e88e6b8 Mon Sep 17 00:00:00 2001 From: Aaron Stannard Date: Sat, 23 May 2026 20:16:20 +0000 Subject: [PATCH 2/3] refactor(providers): lazy ModelCapabilities factory via DI'd resolver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous fix routed startup capability detection through a composite resolver, but it still built a parallel resolver chain — its own HttpClient, its own (provider-less, log-swallowing) LoggerFactory, and its own copies of the same four resolvers that ConfigureDaemonServices registers properly in DI 400-ish lines later. Two copies of the same chain meant two opportunities to drift, no visibility into per-resolver Debug output, and an artificial cache (the AddSingleton snapshot at line 404) that existed only because detection had to run before Build(). Remove the duplication. ModelCapabilities is now a singleton factory that, on first resolve, asks the host's IModelCapabilityResolver (the runtime composite, already wired with HttpClientFactory-managed clients, model -> provider scoping including Compaction, and proper per-resolver loggers) and emits its log line through ILoggerFactory so output lands in whatever providers the host has configured. Eager resolve after Build() preserves startup-time timing — same pattern already used at line 204 for DaemonStartClock. Net effect: - single source of truth for the resolver chain - per-resolver Debug logs become visible (no more invisible LoggerFactory) - ~85 lines of duplicate wiring deleted - no behavior change for consumers — ModelCapabilities is still a singleton, resolved once, held for daemon lifetime --- src/Netclaw.Daemon/Program.cs | 131 ++++++++++------------------------ 1 file changed, 37 insertions(+), 94 deletions(-) diff --git a/src/Netclaw.Daemon/Program.cs b/src/Netclaw.Daemon/Program.cs index db33a5788..2bc907cc4 100644 --- a/src/Netclaw.Daemon/Program.cs +++ b/src/Netclaw.Daemon/Program.cs @@ -203,6 +203,12 @@ static async Task RunDaemonAsync(string[] args, DaemonRestartSignal restartSigna // Eagerly resolve so StartedAt reflects daemon startup, not first request. app.Services.GetRequiredService(); + // Eagerly resolve so capability auto-detection (HF / OpenRouter / provider + // probes) runs at startup, not on first session creation — preserving the + // timing of the previous eager-resolution path while letting detection use + // the host's IModelCapabilityResolver chain and ILoggerFactory. + app.Services.GetRequiredService(); + app.UseAuthentication(); app.UseAuthorization(); app.UseRateLimiter(); @@ -374,9 +380,12 @@ static void ConfigureDaemonServices( .Get() ?? new ModelSelection(); services.AddSingleton(models); - // Auto-detect model capabilities when not manually specified in config. - // Provider-first resolution: query the hosting provider (e.g. Ollama /api/show) - // before falling back to external oracles (OpenRouter, HuggingFace). + // Auto-detect model capabilities via the runtime IModelCapabilityResolver + // chain (registered further down). Lazy factory so detection runs against + // the real DI-wired resolvers with the host's logger — no temp HttpClient + // / LoggerFactory needed, and per-resolver Debug output is visible. The + // factory is invoked eagerly after Build() (see RunDaemonAsync) so timing + // matches a startup-bound resolution rather than first-session lazy hit. var providers = configuration.GetSection("Providers") .Get>() ?? new() { ["local-ollama"] = new ProviderEntry() }; @@ -397,11 +406,32 @@ static void ConfigureDaemonServices( ? mainProvider?.ApiKey?.Value : null; - var detected = ResolveStartupCapabilities( - models.Main.ModelId, daemonLogLevel, mainProviderType, ollamaEndpoint, openAiCompatibleEndpoint, openAiCompatibleApiKey); + services.AddSingleton(sp => + { + var resolver = sp.GetRequiredService(); + var logger = sp.GetRequiredService().CreateLogger("Netclaw.Startup"); - var modelCapabilities = ModelCapabilityResolution.ResolveModelCapabilities(models, detected); - services.AddSingleton(modelCapabilities); + var detected = resolver.ResolveAsync(models.Main.ModelId, CancellationToken.None) + .GetAwaiter().GetResult(); + + if (detected is not null) + { + logger.LogInformation( + "Auto-detected model capabilities for {ModelId}: input={Input}, output={Output}, context_window={ContextWindow}", + models.Main.ModelId, + detected.InputModalities?.ToString() ?? "unknown", + detected.OutputModalities?.ToString() ?? "unknown", + detected.ContextWindowTokens?.ToString() ?? "unknown"); + } + else + { + logger.LogInformation( + "Model {ModelId} not found in capability oracles; defaulting to text-only", + models.Main.ModelId); + } + + return ModelCapabilityResolution.ResolveModelCapabilities(models, detected); + }); // Session config: bind operator-facing settings from config section var sessionConfig = SessionConfig.BindFromConfiguration(configuration.GetSection("Session")); @@ -1050,93 +1080,6 @@ static void ConfigureDaemonServices( } } -/// -/// One-time capability detection at startup. Builds a resolver chain -/// (provider-specific resolver + oracles) and merges partial results -/// across them via — first -/// non-null per field wins. This is what lets a vLLM probe supply the -/// context window while HuggingFace fills in the modality flags it -/// intentionally left null. Runs before the DI container is built, so -/// resolvers are instantiated by hand with a shared transient -/// . Returns null if every resolver in the chain -/// produced nothing (caller falls back to text-only). -/// -static ResolvedModelCapabilities? ResolveStartupCapabilities( - string modelId, LogLevel logLevel, string? providerType, string? ollamaEndpoint, string? openAiCompatibleEndpoint, string? openAiCompatibleApiKey) -{ - try - { - using var httpClient = new HttpClient { Timeout = TimeSpan.FromSeconds(10) }; - using var loggerFactory = LoggerFactory.Create(b => b.SetMinimumLevel(logLevel)); - var logger = loggerFactory.CreateLogger("Netclaw.Startup"); - - var resolvers = new List(); - - // Provider-specific resolver — narrow but authoritative for the - // active backend's local quirks (Ollama's /api/show, vLLM's - // max_model_len, llama.cpp's /props). Each typically supplies only - // a subset of fields — modalities or context window, rarely both. - if (providerType?.Equals("ollama", StringComparison.OrdinalIgnoreCase) == true - && ollamaEndpoint is not null) - { - resolvers.Add(new OllamaCapabilityResolver( - httpClient, loggerFactory.CreateLogger(), ollamaEndpoint)); - } - else if (providerType?.Equals("openai-compatible", StringComparison.OrdinalIgnoreCase) == true - && openAiCompatibleEndpoint is not null) - { - resolvers.Add(new OpenAiCompatibleCapabilityResolver( - httpClient, - loggerFactory.CreateLogger(), - openAiCompatibleEndpoint, - openAiCompatibleApiKey)); - } - - // Oracles — always queried regardless of provider type. The - // composite's merge picks up modality / context-window fields the - // provider-specific resolver left null. OpenRouter covers - // commercial models; HuggingFace covers open-source weights that - // self-hosted backends (vLLM, llama.cpp) serve under HF-shaped ids. - var openRouterDescriptor = new OpenRouterDescriptor(httpClient); - var registry = new ProviderDescriptorRegistry([openRouterDescriptor]); - resolvers.Add(new OpenRouterOracleResolver( - httpClient, loggerFactory.CreateLogger(), registry)); - resolvers.Add(new HuggingFaceCapabilityResolver( - httpClient, loggerFactory.CreateLogger())); - - var composite = new CompositeCapabilityResolver( - resolvers, - loggerFactory.CreateLogger(), - activeProviderForModel: _ => providerType); - - var result = composite.ResolveAsync(modelId, CancellationToken.None) - .GetAwaiter().GetResult(); - - if (result is not null) - { - logger.LogInformation( - "Auto-detected model capabilities for {ModelId}: input={Input}, output={Output}, context_window={ContextWindow}", - modelId, - result.InputModalities?.ToString() ?? "unknown", - result.OutputModalities?.ToString() ?? "unknown", - result.ContextWindowTokens?.ToString() ?? "unknown"); - } - else - { - logger.LogInformation( - "Model {ModelId} not found in capability oracles; defaulting to text-only", - modelId); - } - - return result; - } - catch - { - // Startup capability detection is best-effort — don't crash the daemon - return null; - } -} - /// /// Copies built-in system skills from the daemon's embedded resources into /// build output as BuiltInSkills/{skill-name}/SKILL.md (with companion files). From e868ff483658100c76fac313c8211e8a10af2143 Mon Sep 17 00:00:00 2001 From: Aaron Stannard Date: Sat, 23 May 2026 20:27:18 +0000 Subject: [PATCH 3/3] fix(scripts): detach daemon stdio in swap-daemon.sh so it doesn't hang Process.Start in DaemonManager.Start inherits the parent's stdio handles into the daemon child. When swap-daemon.sh ran netclaw daemon start the script blocked forever waiting for the long-running daemon to close those inherited pipes. Redirect stdin/stdout/stderr to /dev/null at the call site so the pipes close as soon as netclaw daemon start returns. --- scripts/swap-daemon.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/swap-daemon.sh b/scripts/swap-daemon.sh index 0182a21e6..060880abb 100755 --- a/scripts/swap-daemon.sh +++ b/scripts/swap-daemon.sh @@ -37,7 +37,10 @@ start_daemon() { echo "Starting netclaw.service via systemd..." systemctl --user start netclaw.service else - netclaw daemon start + # Detach stdio. The CLI's Process.Start inherits the parent's stdio + # handles into the daemon child, so without this redirect the script + # hangs forever waiting for the daemon to close the inherited pipes. + netclaw daemon start /dev/null 2>&1 fi }