Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 89 additions & 0 deletions src/Build.UnitTests/BackEnd/MSBuildClient_Tests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

#nullable enable

using System;
using System.IO;
using System.Threading;
using Microsoft.Build.Experimental;
using Microsoft.Build.UnitTests;
using Shouldly;
using Xunit;

namespace Microsoft.Build.UnitTests.BackEnd
{
/// <summary>
/// Tests for the <see cref="MSBuildClient"/> fallback behaviour.
/// </summary>
public class MSBuildClient_Tests
{
private readonly ITestOutputHelper _output;

public MSBuildClient_Tests(ITestOutputHelper output)
{
_output = output;
}

/// <summary>
/// When the configured msbuild executable does not exist, launching the server fails.
/// <see cref="MSBuildClient.Execute"/> must return a recoverable exit type rather than
/// letting an exception escape — it is the host's contract that any failure inside
/// Execute routes through <see cref="MSBuildClientExitResult"/> so callers (e.g.
/// <c>MSBuildClientApp</c>) can fall back to in-process execution.
/// </summary>
/// <remarks>
/// Regression coverage for the .NET 10.0.300 / Aspire timeout: when
/// <c>DOTNET_CLI_USE_MSBUILD_SERVER=true</c> is honoured but the server child cannot
/// start (e.g. the apphost can't find the .NET runtime), <see cref="MSBuildClient.Execute"/>
/// must not propagate a <see cref="System.TimeoutException"/> or any other exception.
/// Pre-fix, an uncaught <c>TimeoutException</c> from <c>NamedPipeClientStream.Connect</c>
/// escaped past <c>MSBuildClientApp</c> and crashed the CLI; this test locks in the
/// no-exception-escape contract by simply calling <c>Execute</c> outside any try/catch
/// and asserting on the structured result.
/// </remarks>
[Fact]
public void Execute_WithUnreachableServer_DoesNotPropagateException()
{
// Isolate from any real MSBuild server / DOTNET_CLI_USE_MSBUILD_SERVER state on
// the dev or CI machine. Without this the named-mutex check in MSBuildClient can
// observe a warm server from another test/run, which makes the assertions below
// non-deterministic.
using TestEnvironment env = TestEnvironment.Create(_output);
env.SetEnvironmentVariable("DOTNET_CLI_USE_MSBUILD_SERVER", null);
env.SetEnvironmentVariable("MSBUILDUSESERVER", null);

string[] commandLine = ["dummy.proj"];
string nonexistentMsBuild = Path.Combine(
Path.GetTempPath(),
"msbuildclient-tests-" + Guid.NewGuid().ToString("N"),
"MSBuild.dll");

MSBuildClient client = new MSBuildClient(commandLine, nonexistentMsBuild);

// The whole point of the regression fix: this must NOT throw. xUnit fails the
// test with the offending stack if any exception escapes, which is the
// primary regression contract being verified.
MSBuildClientExitResult result = client.Execute(CancellationToken.None);

result.ShouldNotBeNull();

// The unreachable-server path must produce one of the recoverable failure types
// so that MSBuildClientApp can fall back to in-process execution. Crucially,
// ServerBusy is excluded here: ServerBusy is the "another client is racing for
// the launch mutex" path and is not what an unreachable server should produce —
// accepting it would mask a real regression where the failure was misclassified.
result.MSBuildClientExitType.ShouldBeOneOf(
MSBuildClientExitType.LaunchError,
MSBuildClientExitType.UnableToConnect,
MSBuildClientExitType.UnknownServerState);

// No server child was successfully launched, so the diagnostic helper should not
// have observed an exit code. (Once issue #13718 lands and the diagnostic helper
// is plumbed through every connect failure, this gives MSBuildClientApp the right
// signal to pick the generic "server unavailable" message rather than the more
// specific "crashed with exit code N" one.)
result.ServerProcessExitCode.ShouldBeNull();
}
}
}
105 changes: 100 additions & 5 deletions src/Build/BackEnd/Client/MSBuildClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,12 @@ public sealed class MSBuildClient
/// </summary>
private MSBuildClientPacketPump _packetPump = null!;

/// <summary>
/// PID of the server process this client launched (or null if no launch was attempted /
/// the server was already running). Used for diagnostics on connection failure.
/// </summary>
private int? _launchedServerPid;

/// <summary>
/// Public constructor with parameters.
/// </summary>
Expand Down Expand Up @@ -459,8 +465,19 @@ private bool TryLaunchServer()
];
NodeLauncher nodeLauncher = new NodeLauncher();
CommunicationsUtilities.Trace("Starting Server...");
using Process msbuildProcess = nodeLauncher.Start(new NodeLaunchData(_msbuildLocation, string.Join(" ", msBuildServerOptions)), nodeId: 0);
CommunicationsUtilities.Trace($"Server started with PID: {msbuildProcess?.Id}");

// Set DOTNET_ROOT so the apphost server child can locate the runtime; this
// override is replaced by the client's environment on the first build command
// (see OutOfProcServerNode.HandleServerNodeBuildCommand → SetEnvironment).
// The `!` works around dotnet/msbuild#13761.
NodeLaunchData launchData = new(
MSBuildLocation: _msbuildLocation,
CommandLineArgs: string.Join(" ", msBuildServerOptions),
EnvironmentOverrides: DotnetHostEnvironmentHelper.CreateDotnetRootEnvironmentOverrides()!);

using Process msbuildProcess = nodeLauncher.Start(launchData, nodeId: 0);
_launchedServerPid = msbuildProcess.Id;
CommunicationsUtilities.Trace($"Server started with PID: {_launchedServerPid}");
}
catch (Exception ex)
{
Expand Down Expand Up @@ -605,9 +622,36 @@ private bool TryConnectToServer(int timeoutMilliseconds)
{
tryAgain = false;

HandshakeResult result;
bool connected;
try
{
connected = NodeProviderOutOfProcBase.TryConnectToPipeStream(
_nodeStream, _pipeName, _handshake, Math.Max(1, timeoutMilliseconds - (int)sw.ElapsedMilliseconds), out result);
}
catch (TimeoutException)
{
// The underlying NamedPipeClientStream.Connect throws TimeoutException when the
// pipe never becomes available — typically because the server child process
// failed to start (e.g. apphost couldn't locate the runtime). Treat this as a
// recoverable connection failure so MSBuildClientApp can fall back to in-proc
// execution rather than crashing the whole CLI.
LogConnectFailureDiagnostics(timeoutMilliseconds, isTimeout: true, errorMessage: null);
_exitResult.MSBuildClientExitType = MSBuildClientExitType.UnableToConnect;
return false;
}
catch (Exception ex) when (!ExceptionHandling.IsCriticalException(ex))
{
// Mirror the exception-tolerant behavior of NodeProviderOutOfProcBase.TryConnectToProcess
// so any non-critical failure (UnauthorizedAccessException, IOException,
// InvalidOperationException, etc.) routes through the standard fallback path
// rather than escaping out of MSBuildClient.Execute.
LogConnectFailureDiagnostics(timeoutMilliseconds, isTimeout: false, errorMessage: ex.Message);
_exitResult.MSBuildClientExitType = MSBuildClientExitType.UnableToConnect;
return false;
}

if (NodeProviderOutOfProcBase.TryConnectToPipeStream(
_nodeStream, _pipeName, _handshake, Math.Max(1, timeoutMilliseconds - (int)sw.ElapsedMilliseconds), out HandshakeResult result))
if (connected)
{
return true;
}
Expand All @@ -623,7 +667,7 @@ private bool TryConnectToServer(int timeoutMilliseconds)
}
else
{
CommunicationsUtilities.Trace($"Failed to connect to server: {result.ErrorMessage}");
LogConnectFailureDiagnostics(timeoutMilliseconds, isTimeout: result.Status is HandshakeStatus.Timeout, errorMessage: result.ErrorMessage);
_exitResult.MSBuildClientExitType = MSBuildClientExitType.UnableToConnect;
return false;
}
Expand All @@ -633,6 +677,57 @@ private bool TryConnectToServer(int timeoutMilliseconds)
return false;
}

/// <summary>
/// Emits a single diagnostic trace entry describing why connection to the MSBuild server
/// failed, including the launched server PID (if any) and its current state. This makes
/// the otherwise-opaque 20s timeout actionable when MSBUILDDEBUGCOMM tracing is enabled.
/// Also populates <see cref="MSBuildClientExitResult.ServerProcessExitCode"/> when the
/// launched server child has already exited, so the host can surface that fact to the
/// user-visible "falling back to in-proc" message instead of a generic timeout.
/// </summary>
private void LogConnectFailureDiagnostics(int timeoutMilliseconds, bool isTimeout, string? errorMessage)
{
string serverState;
if (_launchedServerPid is int pid)
{
try
{
using Process launched = Process.GetProcessById(pid);
if (launched.HasExited)
{
_exitResult.ServerProcessExitCode = launched.ExitCode;
serverState = $"PID {pid} (already exited with code {launched.ExitCode})";
}
else
{
serverState = $"PID {pid} (still running)";
}
}
catch (ArgumentException)
{
// Process already terminated and was reaped before we could query it.
serverState = $"PID {pid} (already exited)";
}
catch (InvalidOperationException)
{
serverState = $"PID {pid} (state unavailable)";
}
}
else
{
serverState = "no launch attempted (server reported as already running)";
}

string reason = isTimeout
? $"timed out after {timeoutMilliseconds} ms waiting for the named pipe"
: $"connection error: {errorMessage}";

CommunicationsUtilities.Trace(
$"MSBuild server connection failed ({reason}). Launched server: {serverState}. " +
"Falling back to in-proc build. " +
"If the server child process exited immediately, ensure DOTNET_ROOT is set correctly so the apphost can locate the .NET runtime.");
}

private void WritePacket(Stream nodeStream, INodePacket packet)
{
MemoryStream memoryStream = _packetMemoryStream;
Expand Down
9 changes: 9 additions & 0 deletions src/Build/BackEnd/Client/MSBuildClientExitResult.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,14 @@ public sealed class MSBuildClientExitResult
/// This field is null if MSBuild client execution was not successful.
/// </summary>
public string? MSBuildAppExitTypeString { get; set; }

/// <summary>
/// When this client launched a server child process and that process had already exited
/// by the time we observed the connection failure, this is its exit code. <c>null</c>
/// otherwise (server still running, never launched, or its state could not be queried).
/// Hosts use this to surface "server crashed immediately on launch" to the user instead
/// of a generic timeout message.
/// </summary>
public int? ServerProcessExitCode { get; set; }
}
}
39 changes: 38 additions & 1 deletion src/MSBuild/MSBuildClientApp.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Globalization;
using System.Threading;
using Microsoft.Build.Experimental;
using Microsoft.Build.Framework.Telemetry;
Expand Down Expand Up @@ -70,7 +71,18 @@ public static MSBuildApp.ExitType Execute(string[] commandLineArgs, string msbui
KnownTelemetry.PartialBuildTelemetry.ServerFallbackReason = exitResult.MSBuildClientExitType.ToString();
}

// Server is busy, fallback to old behavior.
// Surface a single user-visible message on stderr when the failure is something
// other than the well-understood "another client is racing us for the launch
// mutex" case. Without this the user sees no indication that MSBuild Server was
// requested but unavailable; previously a connection timeout would even crash
// the process (the DOTNET_CLI_USE_MSBUILD_SERVER=true regression in 10.0.300).
if (exitResult.MSBuildClientExitType != MSBuildClientExitType.ServerBusy)
{
string detail = GetServerFallbackDetail(exitResult);
Console.Error.WriteLine(ResourceUtilities.FormatResourceStringStripCodeAndKeyword("MSBuildServerUnavailable", detail));
Comment thread
JanProvaznik marked this conversation as resolved.
}

// Server is busy / unavailable, fallback to old behavior.
return MSBuildApp.Execute(commandLineArgs);
}

Expand All @@ -84,5 +96,30 @@ public static MSBuildApp.ExitType Execute(string[] commandLineArgs, string msbui

return MSBuildApp.ExitType.MSBuildClientFailure;
}

/// <summary>
/// Picks the most specific localized "why MSBuild server was unavailable" sub-message for
/// the user-visible fallback notice. Prefers the "server crashed immediately on launch"
/// detail over a generic connect-failure message when the launched server's exit code is
/// known.
/// </summary>
private static string GetServerFallbackDetail(MSBuildClientExitResult exitResult)
{
return exitResult.MSBuildClientExitType switch
{
MSBuildClientExitType.LaunchError =>
ResourceUtilities.FormatResourceStringStripCodeAndKeyword("MSBuildServerLaunchError"),
MSBuildClientExitType.UnknownServerState =>
ResourceUtilities.FormatResourceStringStripCodeAndKeyword("MSBuildServerStateUnknown"),
MSBuildClientExitType.UnableToConnect when exitResult.ServerProcessExitCode is int code =>
ResourceUtilities.FormatResourceStringStripCodeAndKeyword(
"MSBuildServerCrashedOnLaunch",
code.ToString(CultureInfo.InvariantCulture)),
// Default: UnableToConnect without a known exit code, or any future MSBuildClientExitType
// value the caller forwards here. Wording is deliberately neutral about whether the
// underlying failure was a timeout or a non-timeout connect error.
_ => ResourceUtilities.FormatResourceStringStripCodeAndKeyword("MSBuildServerConnectFailed"),
};
Comment thread
JanProvaznik marked this conversation as resolved.
}
}
}
23 changes: 23 additions & 0 deletions src/MSBuild/Resources/Strings.resx
Comment thread
rainersigwald marked this conversation as resolved.
Original file line number Diff line number Diff line change
Expand Up @@ -1736,6 +1736,29 @@
</data>
<!-- **** TaskHost strings end **** -->

<!-- **** MSBuild Server fallback strings begin **** -->
<data name="MSBuildServerUnavailable" xml:space="preserve">
<value>MSBuild server unavailable: {0}. Falling back to an in-process build.</value>
<comment>LOCALIZATION: {0} is a sub-message describing why the MSBuild server could not be used. It is one of MSBuildServerCrashedOnLaunch / MSBuildServerLaunchError / MSBuildServerStateUnknown / MSBuildServerConnectFailed. The outer template supplies the trailing period after {0}; sub-messages must NOT end with a period or other sentence-final punctuation.</comment>
</data>
<data name="MSBuildServerCrashedOnLaunch" xml:space="preserve">
<value>the server process exited with code {0} immediately after launch</value>
<comment>LOCALIZATION: {0} is the integer process exit code; rendered with InvariantCulture as a decimal integer, do not localize the digits. This message is concatenated mid-sentence after a colon in MSBuildServerUnavailable: lower-case start in English is intentional and the message must NOT end with a period (the outer template adds it).</comment>
</data>
<data name="MSBuildServerLaunchError" xml:space="preserve">
<value>the server process could not be launched</value>
<comment>LOCALIZATION: No format arguments. Concatenated mid-sentence after a colon in MSBuildServerUnavailable: lower-case start in English is intentional and the message must NOT end with a period (the outer template adds it).</comment>
</data>
<data name="MSBuildServerStateUnknown" xml:space="preserve">
<value>the current server state could not be determined</value>
<comment>LOCALIZATION: No format arguments. Concatenated mid-sentence after a colon in MSBuildServerUnavailable: lower-case start in English is intentional and the message must NOT end with a period (the outer template adds it).</comment>
</data>
<data name="MSBuildServerConnectFailed" xml:space="preserve">
<value>could not connect to the server within the timeout window; the server may have failed to start</value>
<comment>LOCALIZATION: No format arguments. Used for any connect failure when the server's exit code is not known &#8212; covers both pipe-connect timeouts and non-timeout I/O errors during the connect attempt. Concatenated mid-sentence after a colon in MSBuildServerUnavailable: lower-case start in English is intentional and the message must NOT end with a period (the outer template adds it).</comment>
</data>
<!-- **** MSBuild Server fallback strings end **** -->

<!--
The command line message bucket is: MSB1001 - MSB1999

Expand Down
Loading
Loading