From 72e8079787b8a45bfb13ed9b0333d790a198505f Mon Sep 17 00:00:00 2001 From: Aaron Stannard Date: Sat, 23 May 2026 21:26:28 +0000 Subject: [PATCH 1/2] fix(providers): restore Copilot OAuth by reverting to allowlisted OAuth App + editor headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #1075's b3d64d3f swapped the Copilot client ID from the Neovim plugin's OAuth App (Iv1.b507a08c87ecfe98) to a Netclaw-owned GitHub App (Iv23lipIurKdMkbqy6nH). The new App was rejected at api.github.com/copilot_internal/v2/token with HTTP 403 "Resource not accessible by integration" regardless of declared permissions. Empirically verified: adding Copilot Chat AND Copilot Editor Context Read-only to the App still returned 403. The /copilot_internal/v2/token endpoint is the editor-integration surface and is gated to a small allowlist of OAuth Apps; GitHub App user-to-server tokens are not honored there. Every community Copilot client (avante.nvim, copilot.lua, CodeAlta) takes the same posture we're restoring here. Three changes on the exchange request: - Revert ClientId to Iv1.b507a08c87ecfe98 (Neovim Copilot OAuth App) - Add the editor-integration header contract — Copilot-Integration-Id is what tells GitHub's gateway to route through the Copilot permission model rather than evaluating generic App scopes - Switch Authorization scheme from `token` to `Bearer` to match VS Code, Neovim, JetBrains, and CodeAlta reference implementations The chat-path's hardcoded `Netclaw/1.0` editor-version is also corrected to use BuildInfo.Version dynamically, and the exchanger defers User-Agent to NetclawHeadersHandler (already wired via .AddNetclawHeaders("copilot-token")) rather than clobbering the canonical UA with its own hardcode. See #1157 for the diagnosis and follow-up options (apply for an OAuth App allowlist entry, or migrate to the documented Copilot SDK pathway). Refs #1157 --- .../CopilotTokenExchangerTests.cs | 28 ++++++++++++++++--- .../GitHubCopilot/CopilotTokenExchanger.cs | 24 ++++++++++++---- .../GitHubCopilot/GitHubCopilotDescriptor.cs | 13 +++++++-- 3 files changed, 53 insertions(+), 12 deletions(-) diff --git a/src/Netclaw.Daemon.Tests/Providers/GitHubCopilot/CopilotTokenExchangerTests.cs b/src/Netclaw.Daemon.Tests/Providers/GitHubCopilot/CopilotTokenExchangerTests.cs index c317f6f2c..0188524b6 100644 --- a/src/Netclaw.Daemon.Tests/Providers/GitHubCopilot/CopilotTokenExchangerTests.cs +++ b/src/Netclaw.Daemon.Tests/Providers/GitHubCopilot/CopilotTokenExchangerTests.cs @@ -121,8 +121,8 @@ await exchanger.GetTokenAsync(EntryWithOAuth("oauth-B"), await exchanger.GetTokenAsync(EntryWithOAuth("oauth-A"), TestContext.Current.CancellationToken); - Assert.Equal(1, requestsByToken["token oauth-A"]); - Assert.Equal(1, requestsByToken["token oauth-B"]); + Assert.Equal(1, requestsByToken["Bearer oauth-A"]); + Assert.Equal(1, requestsByToken["Bearer oauth-B"]); } [Fact] @@ -173,8 +173,14 @@ await Assert.ThrowsAsync(() => } [Fact] - public async Task GetToken_TokenRequest_UsesTokenSchemeAndJsonAccept() + public async Task GetToken_TokenRequest_SendsEditorIntegrationContract() { + // The exchange endpoint gates on the editor-integration header set — + // Copilot-Integration-Id in particular is what makes GitHub's gateway + // route through the Copilot permission model. Missing any of these + // headers returns HTTP 403 "Resource not accessible by integration" + // even with a valid OAuth-App user-to-server token. Lock this set in + // so a future cleanup pass doesn't silently regress the integration. HttpRequestMessage? captured = null; var handler = new FakeHttpMessageHandler(request => { @@ -190,8 +196,22 @@ await exchanger.GetTokenAsync(EntryWithOAuth("ghu_oauth"), Assert.NotNull(captured); Assert.Equal("https://api.github.com/copilot_internal/v2/token", captured!.RequestUri!.ToString()); - Assert.Equal("token ghu_oauth", + Assert.Equal("Bearer ghu_oauth", captured.Headers.GetValues("Authorization").Single()); Assert.Contains(captured.Headers.Accept, h => h.MediaType == "application/json"); + + // User-Agent is deliberately not asserted here — it's stamped by + // NetclawHeadersHandler on the named HttpClient, which this unit + // test bypasses with a raw FakeHttpMessageHandler. The handler + // contract is covered in NetclawHeadersHandlerTests. + Assert.False(captured.Headers.Contains("User-Agent"), + "exchanger must defer User-Agent to NetclawHeadersHandler"); + + Assert.Equal($"Netclaw/{BuildInfo.Version}", + captured.Headers.GetValues("Editor-Version").Single()); + Assert.Equal($"netclaw/{BuildInfo.Version}", + captured.Headers.GetValues("Editor-Plugin-Version").Single()); + Assert.Equal("vscode-chat", captured.Headers.GetValues("Copilot-Integration-Id").Single()); + Assert.Equal("2022-11-28", captured.Headers.GetValues("X-GitHub-Api-Version").Single()); } } diff --git a/src/Netclaw.Providers/GitHubCopilot/CopilotTokenExchanger.cs b/src/Netclaw.Providers/GitHubCopilot/CopilotTokenExchanger.cs index ce4303591..4ede9dd08 100644 --- a/src/Netclaw.Providers/GitHubCopilot/CopilotTokenExchanger.cs +++ b/src/Netclaw.Providers/GitHubCopilot/CopilotTokenExchanger.cs @@ -88,13 +88,25 @@ private async Task ExchangeAsync(string oauthToken, CancellationTok { using var request = new HttpRequestMessage(HttpMethod.Get, TokenEndpoint); - // GitHub's /copilot_internal/v2/token expects the OAuth token under - // the "token" auth scheme (its personal-access-token convention), - // NOT "Bearer". The Copilot chat endpoint then uses "Bearer" with - // the short-lived token returned here. - request.Headers.TryAddWithoutValidation("Authorization", $"token {oauthToken}"); + // The exchange endpoint requires the full editor-integration header + // contract — Copilot-Integration-Id is what tells GitHub's gateway + // "route through the Copilot permission model, not the generic + // GitHub App permission model." Without it, even an allowlisted + // OAuth App's user-to-server token gets evaluated against generic + // App scopes and rejected with HTTP 403 "Resource not accessible by + // integration." Cross-checked against CodeAlta's CopilotDirectAuth + // and the Neovim Copilot plugin source; both send the same set. + // + // User-Agent is intentionally NOT set here — the named HttpClient + // ("CopilotTokenExchange") runs through NetclawHeadersHandler, which + // stamps the canonical UA + X-Netclaw-Component. Setting it here + // would win over the handler and clobber the shared identity. + request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", oauthToken); request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json")); - request.Headers.UserAgent.ParseAdd("netclaw/1.0"); + request.Headers.TryAddWithoutValidation("Editor-Version", $"Netclaw/{BuildInfo.Version}"); + request.Headers.TryAddWithoutValidation("Editor-Plugin-Version", $"netclaw/{BuildInfo.Version}"); + request.Headers.TryAddWithoutValidation("Copilot-Integration-Id", "vscode-chat"); + request.Headers.TryAddWithoutValidation("X-GitHub-Api-Version", "2022-11-28"); using var response = await httpClient.SendAsync(request, ct); var body = await response.Content.ReadAsStringAsync(ct); diff --git a/src/Netclaw.Providers/GitHubCopilot/GitHubCopilotDescriptor.cs b/src/Netclaw.Providers/GitHubCopilot/GitHubCopilotDescriptor.cs index 52ee81ef8..86faf4a4d 100644 --- a/src/Netclaw.Providers/GitHubCopilot/GitHubCopilotDescriptor.cs +++ b/src/Netclaw.Providers/GitHubCopilot/GitHubCopilotDescriptor.cs @@ -30,7 +30,16 @@ public sealed class GitHubCopilotDescriptor( TokenEndpoint = new Uri("https://github.com/login/oauth/access_token"), DeviceEndpoint = new Uri("https://github.com/login/device/code"), - ClientId = "Iv23lipIurKdMkbqy6nH", + // OAuth App client_id borrowed from the Neovim Copilot plugin. The + // /copilot_internal/v2/token exchange endpoint is gated to a small + // allowlist of editor-integration OAuth Apps (VS Code, Neovim, + // JetBrains, gh CLI); a Netclaw-owned GitHub App was rejected with + // HTTP 403 "Resource not accessible by integration" regardless of + // configured permissions. Every community Copilot client (avante.nvim, + // copilot.lua, CodeAlta) takes the same posture. Replace if/when + // Netclaw gets its own OAuth App allowlisted by GitHub, or when we + // migrate to the documented Copilot SDK pathway. + ClientId = "Iv1.b507a08c87ecfe98", Scope = "read:user", UseProprietaryDeviceFlow = false, }; @@ -111,7 +120,7 @@ private static Action ApplyCopilotRequestHeaders(string copi { request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", copilotToken); request.Headers.TryAddWithoutValidation("copilot-integration-id", "vscode-chat"); - request.Headers.TryAddWithoutValidation("editor-version", "Netclaw/1.0"); + request.Headers.TryAddWithoutValidation("editor-version", $"Netclaw/{BuildInfo.Version}"); request.Headers.TryAddWithoutValidation("openai-intent", "conversation-agent"); }; From 2ff7723f8d1e22726aa7727f330ce41700250b43 Mon Sep 17 00:00:00 2001 From: Aaron Stannard Date: Sat, 23 May 2026 21:26:44 +0000 Subject: [PATCH 2/2] fix(sessions): surface HTTP / transport failure detail from LlmFailureClassifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The classifier collapsed every non-ProviderException, non-overflow, non-timeout failure into the generic "I encountered an error processing your message. Please try again." string, throwing away the underlying HttpRequestException detail. A user hitting an unreachable provider (connection refused, DNS failure, raw HttpRequestException from SDKs like OllamaSharp that don't wrap into ProviderException) saw the same opaque message they'd get for any other failure, with no path to diagnose it from the chat surface. Adds branches for: - HttpRequestException with status code (401/403 → re-auth prompt, 429 → rate-limit naming, 5xx → server-error naming, other → status + truncated message) - HttpRequestException without status (pre-response failures: connection refused, DNS, TLS) → surfaces the transport message verbatim - Generic catch-all → exception type name + truncated message instead of swallowing entirely All forwarded messages are truncated to 200 chars so a chat window never receives a multi-KB blob. Raw response bodies, headers, tokens, and stack traces remain off-limits — the curated path through ProviderException.UserMessage is still preferred when the provider populates it. 12 new test cases covering each branch plus exception unwrapping through outer wrappers (matches how Akka pipelines surface inner causes). --- .../Sessions/LlmFailureClassifierTests.cs | 148 ++++++++++++++++++ .../Sessions/LlmFailureClassifier.cs | 47 +++++- 2 files changed, 193 insertions(+), 2 deletions(-) create mode 100644 src/Netclaw.Actors.Tests/Sessions/LlmFailureClassifierTests.cs diff --git a/src/Netclaw.Actors.Tests/Sessions/LlmFailureClassifierTests.cs b/src/Netclaw.Actors.Tests/Sessions/LlmFailureClassifierTests.cs new file mode 100644 index 000000000..a52b42597 --- /dev/null +++ b/src/Netclaw.Actors.Tests/Sessions/LlmFailureClassifierTests.cs @@ -0,0 +1,148 @@ +// ----------------------------------------------------------------------- +// +// Copyright (C) 2026 - 2026 Petabridge, LLC +// +// ----------------------------------------------------------------------- +using System.Net; +using System.Net.Http; +using Netclaw.Actors.Sessions; +using Netclaw.Configuration; +using Xunit; + +namespace Netclaw.Actors.Tests.Sessions; + +public class LlmFailureClassifierTests +{ + private static readonly ModelCapabilities Model = new() + { + ModelId = "test-model", + ContextWindowTokens = 8000, + }; + + [Fact] + public void NullCause_ReturnsGenericMessage() + { + var message = LlmFailureClassifier.ExtractUserMessage(null, Model); + + Assert.Equal("I encountered an error processing your message. Please try again.", message); + } + + [Fact] + public void ProviderException_UsesProviderUserMessage() + { + // Provider-curated messages bypass our heuristics — the provider + // already decided what's safe to surface. + var ex = new ProviderException("Custom provider message", "internal detail", statusCode: 500); + + var message = LlmFailureClassifier.ExtractUserMessage(ex, Model); + + Assert.Equal("Custom provider message", message); + } + + [Fact] + public void TimeoutException_GetsTimeoutMessage() + { + var message = LlmFailureClassifier.ExtractUserMessage(new TimeoutException("idle"), Model); + + Assert.Contains("timed out", message, StringComparison.OrdinalIgnoreCase); + } + + [Theory] + [InlineData(HttpStatusCode.Unauthorized, "401")] + [InlineData(HttpStatusCode.Forbidden, "403")] + public void HttpRequestException_AuthStatus_PromptsReauth(HttpStatusCode status, string statusText) + { + var ex = new HttpRequestException("auth rejected", inner: null, statusCode: status); + + var message = LlmFailureClassifier.ExtractUserMessage(ex, Model); + + Assert.Contains(statusText, message); + Assert.Contains("netclaw provider add", message); + } + + [Fact] + public void HttpRequestException_429_IsNamedAsRateLimit() + { + var ex = new HttpRequestException("too many", inner: null, statusCode: HttpStatusCode.TooManyRequests); + + var message = LlmFailureClassifier.ExtractUserMessage(ex, Model); + + Assert.Contains("rate-limited", message, StringComparison.OrdinalIgnoreCase); + Assert.Contains("429", message); + } + + [Fact] + public void HttpRequestException_5xx_IsNamedAsServerError() + { + var ex = new HttpRequestException("upstream offline", inner: null, + statusCode: HttpStatusCode.BadGateway); + + var message = LlmFailureClassifier.ExtractUserMessage(ex, Model); + + Assert.Contains("server error", message, StringComparison.OrdinalIgnoreCase); + Assert.Contains("502", message); + } + + [Fact] + public void HttpRequestException_NullStatus_SurfacesTransportDetail() + { + // This is the case the user actually hit — OllamaSharp threw + // HttpRequestException("Connection refused (localhost:11434)") with + // no StatusCode, and the old classifier swallowed it. + var ex = new HttpRequestException("Connection refused (localhost:11434)"); + + var message = LlmFailureClassifier.ExtractUserMessage(ex, Model); + + Assert.Contains("transport error", message, StringComparison.OrdinalIgnoreCase); + Assert.Contains("Connection refused (localhost:11434)", message); + } + + [Fact] + public void HttpRequestException_NestedInInvocationException_IsStillUnwrapped() + { + // Akka and task-based pipelines often wrap the real cause in + // outer exceptions; the classifier must walk the chain. + var inner = new HttpRequestException("Connection refused (host:1234)"); + var wrapped = new InvalidOperationException("LLM call failed", inner); + + var message = LlmFailureClassifier.ExtractUserMessage(wrapped, Model); + + Assert.Contains("transport error", message, StringComparison.OrdinalIgnoreCase); + Assert.Contains("Connection refused (host:1234)", message); + } + + [Fact] + public void UnknownException_SurfacesTypeAndTruncatedMessage() + { + var ex = new InvalidOperationException("something specific went wrong"); + + var message = LlmFailureClassifier.ExtractUserMessage(ex, Model); + + Assert.Contains("InvalidOperationException", message); + Assert.Contains("something specific went wrong", message); + } + + [Fact] + public void UnknownException_LongMessage_GetsTruncated() + { + var longMessage = new string('x', 1000); + var ex = new InvalidOperationException(longMessage); + + var message = LlmFailureClassifier.ExtractUserMessage(ex, Model); + + Assert.True(message.Length < 500, + $"forwarded message length should be capped; got {message.Length}"); + Assert.EndsWith("…", message); + } + + [Fact] + public void ContextOverflow_NamesTheModelAndContextWindow() + { + var ex = new InvalidOperationException("prompt is too long for the context"); + + var message = LlmFailureClassifier.ExtractUserMessage(ex, Model); + + Assert.Contains("test-model", message); + Assert.Contains("8000", message); + } +} diff --git a/src/Netclaw.Actors/Sessions/LlmFailureClassifier.cs b/src/Netclaw.Actors/Sessions/LlmFailureClassifier.cs index 376374062..6a3d8795e 100644 --- a/src/Netclaw.Actors/Sessions/LlmFailureClassifier.cs +++ b/src/Netclaw.Actors/Sessions/LlmFailureClassifier.cs @@ -10,11 +10,21 @@ namespace Netclaw.Actors.Sessions; internal static class LlmFailureClassifier { + // Truncation cap on raw HTTP / exception messages we forward to the UI. + // The provider's HttpRequestException.Message is normally short ("Connection + // refused (host:port)") but SDK-wrapped exceptions can be much longer and + // sometimes embed echoed request fragments. Cap so a chat window never gets + // a 2KB error blob. + private const int MaxForwardedMessageLength = 200; + public static string ExtractUserMessage(Exception? cause, ModelCapabilities model) { if (cause is null) - return "I encountered an error processing your message. Please try again."; + return GenericFailureMessage; + // ProviderException already carries a user-safe message — provider + // transport layers (OpenAiCompatibleChatClient etc.) curate this so + // we don't have to. var providerEx = FindException(cause); if (providerEx is not null) return providerEx.UserMessage; @@ -25,9 +35,42 @@ public static string ExtractUserMessage(Exception? cause, ModelCapabilities mode if (cause is TimeoutException) return "The LLM response stream timed out due to inactivity. The model may be overloaded or the context too large. Please try again."; - return "I encountered an error processing your message. Please try again."; + // Raw HTTP transport failure not wrapped in ProviderException — common + // when SDKs like OllamaSharp throw HttpRequestException directly, or + // when the request never reaches a provider (DNS / connection refused). + // The exception's StatusCode is null for pre-response failures and + // populated for response-derived ones. + var httpEx = FindException(cause); + if (httpEx is not null) + return FormatHttpFailure(httpEx); + + // Final catch-all. Surfacing the exception type + a truncated message + // beats the historical "please try again" wall — at minimum the + // operator sees what kind of failure it was. + return $"Unexpected LLM provider error ({cause.GetType().Name}): {Truncate(cause.Message)}"; } + private const string GenericFailureMessage = + "I encountered an error processing your message. Please try again."; + + private static string FormatHttpFailure(HttpRequestException ex) + { + var status = (int?)ex.StatusCode; + return status switch + { + 401 or 403 => $"LLM provider rejected the request (HTTP {status}): authentication failed or revoked. Re-run 'netclaw provider add ...' or check stored credentials.", + 429 => "LLM provider rate-limited the request (HTTP 429). Wait a moment and try again.", + >= 500 => $"LLM provider returned a server error (HTTP {status}). The provider may be overloaded. Please try again.", + not null => $"LLM provider returned HTTP {status}: {Truncate(ex.Message)}", + null => $"LLM provider transport error: {Truncate(ex.Message)}. Check provider configuration and connectivity.", + }; + } + + private static string Truncate(string? s) => + string.IsNullOrEmpty(s) ? string.Empty : + s.Length <= MaxForwardedMessageLength ? s : + s[..MaxForwardedMessageLength] + "…"; + public static bool IsContextOverflow(Exception? ex) { if (ex is null)