Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions src/Build.UnitTests/BackEnd/MSBuildClient_Tests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.IO;
using System.Threading;
using Microsoft.Build.Experimental;
using Shouldly;
using Xunit;

namespace Microsoft.Build.Engine.UnitTests.BackEnd
{
/// <summary>
/// Tests for the <see cref="MSBuildClient"/> fallback behaviour.
/// </summary>
/// <remarks>
/// Regression coverage for the .NET 10.0.300 / Aspire timeout: when
/// <c>DOTNET_CLI_USE_MSBUILD_SERVER=true</c> is honoured but the server child cannot start
/// (e.g. the apphost can't find the .NET runtime), <see cref="MSBuildClient.Execute"/> must
/// not propagate a <see cref="System.TimeoutException"/> &#8212; it must return an exit type that
/// causes the host (<c>MSBuildClientApp</c>) to fall back to in-proc execution.
Comment thread
JanProvaznik marked this conversation as resolved.
Outdated
/// </remarks>
public sealed class MSBuildClient_Tests
{
/// <summary>
/// When the configured msbuild executable does not exist, launching the server fails.
/// The client must report a recoverable exit type (LaunchError / UnableToConnect /
/// UnknownServerState / ServerBusy) rather than letting an exception escape.
/// </summary>
[Fact]
public void Execute_WithUnreachableServer_DoesNotPropagateException()
{
string[] commandLine = ["dummy.proj"];
string nonexistentMsBuild = Path.Combine(Path.GetTempPath(), "does-not-exist-" + Guid.NewGuid().ToString("N"), "MSBuild.dll");

MSBuildClient client = new MSBuildClient(commandLine, nonexistentMsBuild);

// The whole point of the regression fix: this must NOT throw. Any of the recoverable
// exit types is acceptable here &#8212; what matters is that MSBuildClientApp gets a chance
// to fall back to in-proc execution.
MSBuildClientExitResult result = client.Execute(CancellationToken.None);

result.MSBuildClientExitType.ShouldBeOneOf(
MSBuildClientExitType.LaunchError,
MSBuildClientExitType.UnableToConnect,
MSBuildClientExitType.UnknownServerState,
MSBuildClientExitType.ServerBusy);
}
}
}
100 changes: 95 additions & 5 deletions src/Build/BackEnd/Client/MSBuildClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,12 @@ public sealed class MSBuildClient
/// </summary>
private MSBuildClientPacketPump _packetPump = null!;

/// <summary>
/// PID of the server process this client launched (or null if no launch was attempted /
/// the server was already running). Used for diagnostics on connection failure.
/// </summary>
private int? _launchedServerPid;

/// <summary>
/// Public constructor with parameters.
/// </summary>
Expand Down Expand Up @@ -459,8 +465,21 @@ private bool TryLaunchServer()
];
NodeLauncher nodeLauncher = new NodeLauncher();
CommunicationsUtilities.Trace("Starting Server...");
using Process msbuildProcess = nodeLauncher.Start(new NodeLaunchData(_msbuildLocation, string.Join(" ", msBuildServerOptions)), nodeId: 0);
CommunicationsUtilities.Trace($"Server started with PID: {msbuildProcess?.Id}");

// Apply the same DOTNET_ROOT environment overrides we pass to worker nodes (see
// NodeProviderOutOfProc.CreateNode). When the parent MSBuild process is the
// apphost (native executable), the launched server child is also the apphost and
// needs DOTNET_ROOT to locate the runtime. Without these overrides the server
// process fails to start, the named pipe is never opened, and the client times
// out after 20s (the DOTNET_CLI_USE_MSBUILD_SERVER=true regression in 10.0.300).
NodeLaunchData launchData = new(
MSBuildLocation: _msbuildLocation,
CommandLineArgs: string.Join(" ", msBuildServerOptions),
EnvironmentOverrides: DotnetHostEnvironmentHelper.CreateDotnetRootEnvironmentOverrides()!);

using Process msbuildProcess = nodeLauncher.Start(launchData, nodeId: 0);
_launchedServerPid = msbuildProcess?.Id;
Comment thread
JanProvaznik marked this conversation as resolved.
Outdated
CommunicationsUtilities.Trace($"Server started with PID: {_launchedServerPid}");
}
catch (Exception ex)
{
Expand Down Expand Up @@ -605,9 +624,36 @@ private bool TryConnectToServer(int timeoutMilliseconds)
{
tryAgain = false;

HandshakeResult result;
bool connected;
try
{
connected = NodeProviderOutOfProcBase.TryConnectToPipeStream(
_nodeStream, _pipeName, _handshake, Math.Max(1, timeoutMilliseconds - (int)sw.ElapsedMilliseconds), out result);
}
catch (TimeoutException)
{
// The underlying NamedPipeClientStream.Connect throws TimeoutException when the
// pipe never becomes available — typically because the server child process
// failed to start (e.g. apphost couldn't locate the runtime). Treat this as a
// recoverable connection failure so MSBuildClientApp can fall back to in-proc
// execution rather than crashing the whole CLI.
LogConnectFailureDiagnostics(timeoutMilliseconds, isTimeout: true, errorMessage: null);
_exitResult.MSBuildClientExitType = MSBuildClientExitType.UnableToConnect;
return false;
}
catch (Exception ex) when (!ExceptionHandling.IsCriticalException(ex))
{
// Mirror the exception-tolerant behavior of NodeProviderOutOfProcBase.TryConnectToProcess
// so any non-critical failure (UnauthorizedAccessException, IOException,
// InvalidOperationException, etc.) routes through the standard fallback path
// rather than escaping out of MSBuildClient.Execute.
LogConnectFailureDiagnostics(timeoutMilliseconds, isTimeout: false, errorMessage: ex.Message);
_exitResult.MSBuildClientExitType = MSBuildClientExitType.UnableToConnect;
return false;
}

if (NodeProviderOutOfProcBase.TryConnectToPipeStream(
_nodeStream, _pipeName, _handshake, Math.Max(1, timeoutMilliseconds - (int)sw.ElapsedMilliseconds), out HandshakeResult result))
if (connected)
{
return true;
}
Expand All @@ -623,7 +669,7 @@ private bool TryConnectToServer(int timeoutMilliseconds)
}
else
{
CommunicationsUtilities.Trace($"Failed to connect to server: {result.ErrorMessage}");
LogConnectFailureDiagnostics(timeoutMilliseconds, isTimeout: result.Status is HandshakeStatus.Timeout, errorMessage: result.ErrorMessage);
_exitResult.MSBuildClientExitType = MSBuildClientExitType.UnableToConnect;
return false;
}
Expand All @@ -633,6 +679,50 @@ private bool TryConnectToServer(int timeoutMilliseconds)
return false;
}

/// <summary>
/// Emits a single diagnostic trace entry describing why connection to the MSBuild server
/// failed, including the launched server PID (if any) and its current state. This makes
/// the otherwise-opaque 20s timeout actionable when MSBUILDDEBUGCOMM tracing is enabled.
/// </summary>
private void LogConnectFailureDiagnostics(int timeoutMilliseconds, bool isTimeout, string? errorMessage)
{
string serverState;
if (_launchedServerPid is int pid)
{
try
{
using Process? launched = Process.GetProcessById(pid);
serverState = launched is null
? $"PID {pid} (no longer present)"
: launched.HasExited
Comment thread
JanProvaznik marked this conversation as resolved.
Outdated
? $"PID {pid} (already exited with code {launched.ExitCode})"
: $"PID {pid} (still running)";
}
catch (ArgumentException)
{
// Process already terminated and was reaped before we could query it.
serverState = $"PID {pid} (already exited)";
}
catch (InvalidOperationException)
{
serverState = $"PID {pid} (state unavailable)";
}
}
else
{
serverState = "no launch attempted (server reported as already running)";
}

string reason = isTimeout
? $"timed out after {timeoutMilliseconds} ms waiting for the named pipe"
: $"connection error: {errorMessage}";

CommunicationsUtilities.Trace(
$"MSBuild server connection failed ({reason}). Launched server: {serverState}. " +
"Falling back to in-proc build. " +
"If the server child process exited immediately, ensure DOTNET_ROOT is set correctly so the apphost can locate the .NET runtime.");
}

private void WritePacket(Stream nodeStream, INodePacket packet)
{
MemoryStream memoryStream = _packetMemoryStream;
Expand Down
15 changes: 14 additions & 1 deletion src/MSBuild/MSBuildClientApp.cs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,20 @@ public static MSBuildApp.ExitType Execute(string[] commandLineArgs, string msbui
KnownTelemetry.PartialBuildTelemetry.ServerFallbackReason = exitResult.MSBuildClientExitType.ToString();
}

// Server is busy, fallback to old behavior.
// Surface a single user-visible message on stderr when the failure is something
// other than the well-understood "another client is racing us for the launch
// mutex" case. Without this the user sees no indication that MSBuild Server was
// requested but unavailable; previously a connection timeout would even crash
// the process (the DOTNET_CLI_USE_MSBUILD_SERVER=true regression in 10.0.300).
if (exitResult.MSBuildClientExitType != MSBuildClientExitType.ServerBusy)
{
string detail = exitResult.MSBuildClientExitType == MSBuildClientExitType.UnableToConnect
? "could not connect to the server (it may have failed to start; ensure DOTNET_ROOT is set so the apphost can locate the .NET runtime, or set MSBUILDDEBUGCOMM=1 for diagnostics)"
Comment thread
JanProvaznik marked this conversation as resolved.
Outdated
: exitResult.MSBuildClientExitType.ToString();
Console.Error.WriteLine($"MSBuild server unavailable ({detail}); falling back to in-proc build.");
}

// Server is busy / unavailable, fallback to old behavior.
return MSBuildApp.Execute(commandLineArgs);
}

Expand Down
Loading