From 040631e3953182fe01776246df7ef9c68782bf53 Mon Sep 17 00:00:00 2001 From: Edison-A-N Date: Wed, 15 Oct 2025 16:14:36 +0800 Subject: [PATCH 1/4] feat: improve fault tolerance for MCP server connections - Add fail_fast option to control connection failure behavior - Add on_connection_error callback for custom error handling - Track failed connections for better debugging - Try connecting to each server individually for graceful degradation - Add get_connection_summary() method for connection status overview --- src/mcpadapt/core.py | 89 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 71 insertions(+), 18 deletions(-) diff --git a/src/mcpadapt/core.py b/src/mcpadapt/core.py index b16b562..d0ee1b3 100644 --- a/src/mcpadapt/core.py +++ b/src/mcpadapt/core.py @@ -187,6 +187,8 @@ def __init__( adapter: ToolAdapter, connect_timeout: int = 30, client_session_timeout_seconds: float | timedelta | None = 5, + fail_fast: bool = False, + on_connection_error: Callable[[Any, Exception], None] | None = None, ): """ Manage the MCP server / client lifecycle and expose tools adapted with the adapter. @@ -197,9 +199,14 @@ def __init__( adapter (ToolAdapter): Adapter to use to convert MCP tools call into agentic framework tools. connect_timeout (int): Connection timeout in seconds to the mcp server (default is 30s). client_session_timeout_seconds: Timeout for MCP ClientSession calls + fail_fast (bool): If True, any connection failure will cause the entire adapter to fail. + If False, failed connections are skipped and only successful connections are used. + Default is False for better fault tolerance. + on_connection_error: Optional callback function called when a connection fails. + Receives (server_params, exception) as arguments. Raises: - TimeoutError: When the connection to the mcp server time out. + TimeoutError: When the connection to the mcp server time out and fail_fast=True. """ if isinstance(serverparams, list): @@ -208,6 +215,11 @@ def __init__( self.serverparams = [serverparams] self.adapter = adapter + self.fail_fast = fail_fast + self.on_connection_error = on_connection_error + + # Track failed connections for transparency + self.failed_connections: list[tuple[Any, Exception]] = [] # session and tools get set by the async loop during initialization. self.sessions: list[ClientSession] = [] @@ -229,13 +241,32 @@ def _run_loop(self): async def setup(): async with AsyncExitStack() as stack: - connections = [ - await stack.enter_async_context( - mcptools(params, self.client_session_timeout_seconds) - ) - for params in self.serverparams - ] - self.sessions, self.mcp_tools = [list(c) for c in zip(*connections)] + connections = [] + + # Try to connect to each server individually for better fault tolerance + for params in self.serverparams: + try: + connection = await stack.enter_async_context( + mcptools(params, self.client_session_timeout_seconds) + ) + connections.append(connection) + except Exception as e: + self.failed_connections.append((params, e)) + error_msg = f"Failed to connect to MCP server {params}: {e}" + + if self.on_connection_error: + self.on_connection_error(params, e) + + if self.fail_fast: + raise + else: + pass + + if not connections and not self.fail_fast: + self.sessions, self.mcp_tools = [], [] + elif connections: + self.sessions, self.mcp_tools = [list(c) for c in zip(*connections)] + self.ready.set() # Signal initialization is complete await asyncio.Event().wait() # Keep session alive until stopped @@ -257,9 +288,13 @@ def tools(self) -> list[Any]: see :meth:`atools`. """ - if not self.sessions: + if not self.sessions and not self.failed_connections: raise RuntimeError("Session not initialized") + if not self.sessions: + # Only failed connections, no successful ones + return [] + def _sync_call_tool( session, name: str, arguments: dict | None = None ) -> mcp.types.CallToolResult: @@ -296,9 +331,11 @@ def start(self): # check connection to mcp server is ready if not self.ready.wait(timeout=self.connect_timeout): raise TimeoutError( - f"Couldn't connect to the MCP server after {self.connect_timeout} seconds" + f"Couldn't initialize MCP connections after {self.connect_timeout} seconds" ) + pass + def close(self): """Clean up resources and stop the client.""" if self.task and not self.task.done(): @@ -337,14 +374,30 @@ async def atools(self) -> list[Any]: async def __aenter__(self) -> list[Any]: self._ctxmanager = AsyncExitStack() - connections = [ - await self._ctxmanager.enter_async_context( - mcptools(params, self.client_session_timeout_seconds) - ) - for params in self.serverparams - ] - - self.sessions, self.mcp_tools = [list(c) for c in zip(*connections)] + connections = [] + + # Try to connect to each server individually for better fault tolerance + for params in self.serverparams: + try: + connection = await self._ctxmanager.enter_async_context( + mcptools(params, self.client_session_timeout_seconds) + ) + connections.append(connection) + except Exception as e: + self.failed_connections.append((params, e)) + + if self.on_connection_error: + self.on_connection_error(params, e) + + if self.fail_fast: + raise + else: + pass + + if not connections and not self.fail_fast: + self.sessions, self.mcp_tools = [], [] + elif connections: + self.sessions, self.mcp_tools = [list(c) for c in zip(*connections)] return await self.atools() From 0c6c1f199c4da8ce7fa87333ded23f5ff288e64b Mon Sep 17 00:00:00 2001 From: Edison-A-N Date: Wed, 22 Oct 2025 15:23:09 +0800 Subject: [PATCH 2/4] fix: set fail_fast default to True for backward compatibility --- src/mcpadapt/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mcpadapt/core.py b/src/mcpadapt/core.py index d0ee1b3..8ea238a 100644 --- a/src/mcpadapt/core.py +++ b/src/mcpadapt/core.py @@ -187,7 +187,7 @@ def __init__( adapter: ToolAdapter, connect_timeout: int = 30, client_session_timeout_seconds: float | timedelta | None = 5, - fail_fast: bool = False, + fail_fast: bool = True, on_connection_error: Callable[[Any, Exception], None] | None = None, ): """ @@ -201,7 +201,7 @@ def __init__( client_session_timeout_seconds: Timeout for MCP ClientSession calls fail_fast (bool): If True, any connection failure will cause the entire adapter to fail. If False, failed connections are skipped and only successful connections are used. - Default is False for better fault tolerance. + Default is True to maintain backward compatibility. on_connection_error: Optional callback function called when a connection fails. Receives (server_params, exception) as arguments. From a17a7cecd95aef5bfff093184be292b8122ba0e7 Mon Sep 17 00:00:00 2001 From: Edison-A-N Date: Thu, 23 Oct 2025 10:09:30 +0800 Subject: [PATCH 3/4] fix: restore error message for backward compatibility --- src/mcpadapt/core.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/mcpadapt/core.py b/src/mcpadapt/core.py index 8ea238a..7087929 100644 --- a/src/mcpadapt/core.py +++ b/src/mcpadapt/core.py @@ -252,7 +252,6 @@ async def setup(): connections.append(connection) except Exception as e: self.failed_connections.append((params, e)) - error_msg = f"Failed to connect to MCP server {params}: {e}" if self.on_connection_error: self.on_connection_error(params, e) @@ -331,11 +330,9 @@ def start(self): # check connection to mcp server is ready if not self.ready.wait(timeout=self.connect_timeout): raise TimeoutError( - f"Couldn't initialize MCP connections after {self.connect_timeout} seconds" + f"Couldn't connect to the MCP server after {self.connect_timeout} seconds" ) - pass - def close(self): """Clean up resources and stop the client.""" if self.task and not self.task.done(): From faf2f630382f5b8bd4db9997ea46386e42fc1dce Mon Sep 17 00:00:00 2001 From: Edison-A-N Date: Thu, 23 Oct 2025 10:09:40 +0800 Subject: [PATCH 4/4] test: add fault tolerance test cases --- tests/test_core_fault_tolerance.py | 388 +++++++++++++++++++++++++++++ 1 file changed, 388 insertions(+) create mode 100644 tests/test_core_fault_tolerance.py diff --git a/tests/test_core_fault_tolerance.py b/tests/test_core_fault_tolerance.py new file mode 100644 index 0000000..42070b8 --- /dev/null +++ b/tests/test_core_fault_tolerance.py @@ -0,0 +1,388 @@ +""" +Test cases for fault tolerance features in MCPAdapt. + +This module tests the fail_fast, on_connection_error, and failed_connections +tracking features added to improve fault tolerance when connecting to multiple +MCP servers. +""" + +from textwrap import dedent +from typing import Any, Callable, Coroutine + +import mcp +import pytest +from mcp import StdioServerParameters + +from mcpadapt.core import MCPAdapt, ToolAdapter + + +class DummyAdapter(ToolAdapter): + """A dummy adapter that returns the function as is""" + + def adapt( + self, + func: Callable[[dict | None], mcp.types.CallToolResult], + mcp_tool: mcp.types.Tool, + ): + return func + + def async_adapt( + self, + afunc: Callable[[dict | None], Coroutine[Any, Any, mcp.types.CallToolResult]], + mcp_tool: mcp.types.Tool, + ): + return afunc + + +@pytest.fixture +def echo_server_script(): + return dedent( + ''' + from mcp.server.fastmcp import FastMCP + + mcp = FastMCP("Echo Server") + + @mcp.tool() + def echo_tool(text: str) -> str: + """Echo the input text""" + return f"Echo: {text}" + + mcp.run() + ''' + ) + + +@pytest.fixture +def failing_server_script(): + """A script that fails to start""" + return dedent( + """ + import sys + # Exit immediately to simulate a failing server + sys.exit(1) + """ + ) + + +# ========== Synchronous Tests ========== + + +def test_fail_fast_default_behavior(failing_server_script): + """Test that fail_fast=True (default) raises exception when server fails""" + with pytest.raises(Exception): + with MCPAdapt( + StdioServerParameters( + command="uv", args=["run", "python", "-c", failing_server_script] + ), + DummyAdapter(), + ): + pass + + +def test_fail_fast_true_explicit(failing_server_script): + """Test that fail_fast=True explicitly set raises exception when server fails""" + with pytest.raises(Exception): + with MCPAdapt( + StdioServerParameters( + command="uv", args=["run", "python", "-c", failing_server_script] + ), + DummyAdapter(), + fail_fast=True, + ): + pass + + +def test_fail_fast_false_single_failing_server(failing_server_script): + """Test that fail_fast=False with single failing server returns empty tools""" + with MCPAdapt( + StdioServerParameters( + command="uv", args=["run", "python", "-c", failing_server_script] + ), + DummyAdapter(), + fail_fast=False, + ) as tools: + assert len(tools) == 0 + + +def test_fail_fast_false_mixed_servers(echo_server_script, failing_server_script): + """Test that fail_fast=False skips failing server and uses successful one""" + with MCPAdapt( + [ + StdioServerParameters( + command="uv", args=["run", "python", "-c", failing_server_script] + ), + StdioServerParameters( + command="uv", args=["run", "python", "-c", echo_server_script] + ), + ], + DummyAdapter(), + fail_fast=False, + ) as tools: + # Should only have tools from the successful server + assert len(tools) == 1 + assert tools[0]({"text": "hello"}).content[0].text == "Echo: hello" + + +def test_fail_fast_false_multiple_mixed_servers( + echo_server_script, failing_server_script +): + """Test that fail_fast=False works with multiple failing and successful servers""" + with MCPAdapt( + [ + StdioServerParameters( + command="uv", args=["run", "python", "-c", failing_server_script] + ), + StdioServerParameters( + command="uv", args=["run", "python", "-c", echo_server_script] + ), + StdioServerParameters( + command="uv", args=["run", "python", "-c", failing_server_script] + ), + StdioServerParameters( + command="uv", args=["run", "python", "-c", echo_server_script] + ), + ], + DummyAdapter(), + fail_fast=False, + ) as tools: + # Should have tools from the 2 successful servers + assert len(tools) == 2 + assert tools[0]({"text": "hello"}).content[0].text == "Echo: hello" + assert tools[1]({"text": "world"}).content[0].text == "Echo: world" + + +def test_on_connection_error_callback(failing_server_script): + """Test that on_connection_error callback is called when connection fails""" + callback_invocations = [] + + def error_callback(server_params, exception): + callback_invocations.append((server_params, exception)) + + with MCPAdapt( + StdioServerParameters( + command="uv", args=["run", "python", "-c", failing_server_script] + ), + DummyAdapter(), + fail_fast=False, + on_connection_error=error_callback, + ): + pass + + # Callback should have been called once + assert len(callback_invocations) == 1 + server_params, exception = callback_invocations[0] + assert isinstance(server_params, StdioServerParameters) + assert isinstance(exception, Exception) + + +def test_on_connection_error_callback_multiple_failures( + echo_server_script, failing_server_script +): + """Test that on_connection_error is called for each failed connection""" + callback_invocations = [] + + def error_callback(server_params, exception): + callback_invocations.append((server_params, exception)) + + with MCPAdapt( + [ + StdioServerParameters( + command="uv", args=["run", "python", "-c", failing_server_script] + ), + StdioServerParameters( + command="uv", args=["run", "python", "-c", echo_server_script] + ), + StdioServerParameters( + command="uv", args=["run", "python", "-c", failing_server_script] + ), + ], + DummyAdapter(), + fail_fast=False, + on_connection_error=error_callback, + ) as tools: + assert len(tools) == 1 + + # Callback should have been called twice (for two failing servers) + assert len(callback_invocations) == 2 + for server_params, exception in callback_invocations: + assert isinstance(server_params, StdioServerParameters) + assert isinstance(exception, Exception) + + +def test_failed_connections_tracking(failing_server_script): + """Test that failed_connections list tracks failed connections""" + adapter = MCPAdapt( + StdioServerParameters( + command="uv", args=["run", "python", "-c", failing_server_script] + ), + DummyAdapter(), + fail_fast=False, + ) + + with adapter: + pass + + # Should have one failed connection tracked + assert len(adapter.failed_connections) == 1 + server_params, exception = adapter.failed_connections[0] + assert isinstance(server_params, StdioServerParameters) + assert isinstance(exception, Exception) + + +def test_failed_connections_tracking_mixed(echo_server_script, failing_server_script): + """Test that failed_connections tracks only failed connections in mixed scenario""" + adapter = MCPAdapt( + [ + StdioServerParameters( + command="uv", args=["run", "python", "-c", failing_server_script] + ), + StdioServerParameters( + command="uv", args=["run", "python", "-c", echo_server_script] + ), + StdioServerParameters( + command="uv", args=["run", "python", "-c", failing_server_script] + ), + ], + DummyAdapter(), + fail_fast=False, + ) + + with adapter as tools: + assert len(tools) == 1 + + # Should have two failed connections tracked + assert len(adapter.failed_connections) == 2 + for server_params, exception in adapter.failed_connections: + assert isinstance(server_params, StdioServerParameters) + assert isinstance(exception, Exception) + + +# ========== Asynchronous Tests ========== + + +async def test_fail_fast_false_async_mixed_servers( + echo_server_script, failing_server_script +): + """Test async context manager with fail_fast=False and mixed servers""" + async with MCPAdapt( + [ + StdioServerParameters( + command="uv", args=["run", "python", "-c", failing_server_script] + ), + StdioServerParameters( + command="uv", args=["run", "python", "-c", echo_server_script] + ), + ], + DummyAdapter(), + fail_fast=False, + ) as tools: + # Should only have tools from the successful server + assert len(tools) == 1 + result = await tools[0]({"text": "hello"}) + assert result.content[0].text == "Echo: hello" + + +async def test_fail_fast_false_async_single_failing_server(failing_server_script): + """Test async context manager with fail_fast=False and single failing server""" + async with MCPAdapt( + StdioServerParameters( + command="uv", args=["run", "python", "-c", failing_server_script] + ), + DummyAdapter(), + fail_fast=False, + ) as tools: + assert len(tools) == 0 + + +async def test_fail_fast_true_async_with_failure(failing_server_script): + """Test async context manager with fail_fast=True raises exception""" + with pytest.raises(Exception): + async with MCPAdapt( + StdioServerParameters( + command="uv", args=["run", "python", "-c", failing_server_script] + ), + DummyAdapter(), + fail_fast=True, + ): + pass + + +async def test_on_connection_error_callback_async(failing_server_script): + """Test that on_connection_error callback works in async context""" + callback_invocations = [] + + def error_callback(server_params, exception): + callback_invocations.append((server_params, exception)) + + async with MCPAdapt( + StdioServerParameters( + command="uv", args=["run", "python", "-c", failing_server_script] + ), + DummyAdapter(), + fail_fast=False, + on_connection_error=error_callback, + ): + pass + + # Callback should have been called once + assert len(callback_invocations) == 1 + server_params, exception = callback_invocations[0] + assert isinstance(server_params, StdioServerParameters) + assert isinstance(exception, Exception) + + +async def test_failed_connections_tracking_async( + echo_server_script, failing_server_script +): + """Test that failed_connections tracking works in async context""" + adapter = MCPAdapt( + [ + StdioServerParameters( + command="uv", args=["run", "python", "-c", failing_server_script] + ), + StdioServerParameters( + command="uv", args=["run", "python", "-c", echo_server_script] + ), + ], + DummyAdapter(), + fail_fast=False, + ) + + async with adapter as tools: + assert len(tools) == 1 + + # Should have one failed connection tracked + assert len(adapter.failed_connections) == 1 + server_params, exception = adapter.failed_connections[0] + assert isinstance(server_params, StdioServerParameters) + assert isinstance(exception, Exception) + + +async def test_failed_connections_tracking_async_multiple( + echo_server_script, failing_server_script +): + """Test that failed_connections tracking works with multiple failures in async context""" + adapter = MCPAdapt( + [ + StdioServerParameters( + command="uv", args=["run", "python", "-c", failing_server_script] + ), + StdioServerParameters( + command="uv", args=["run", "python", "-c", echo_server_script] + ), + StdioServerParameters( + command="uv", args=["run", "python", "-c", failing_server_script] + ), + ], + DummyAdapter(), + fail_fast=False, + ) + + async with adapter as tools: + assert len(tools) == 1 + + # Should have two failed connections tracked + assert len(adapter.failed_connections) == 2 + for server_params, exception in adapter.failed_connections: + assert isinstance(server_params, StdioServerParameters) + assert isinstance(exception, Exception)