-
Notifications
You must be signed in to change notification settings - Fork 2k
Dereference $ref in tool schemas for MCP client compatibility #2814
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
5806f04
73ae9e3
d8b95a0
3dea366
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,8 +10,11 @@ def dereference_refs(schema: dict[str, Any]) -> dict[str, Any]: | |
| """Resolve all $ref references in a JSON schema by inlining definitions. | ||
|
|
||
| This function resolves $ref references that point to $defs, replacing them | ||
| with the actual definition content. This is necessary because some MCP clients | ||
| (e.g., VS Code Copilot) don't properly handle $ref in tool input schemas. | ||
| with the actual definition content while preserving sibling keywords (like | ||
| description, default, examples) that Pydantic places alongside $ref. | ||
|
|
||
| This is necessary because some MCP clients (e.g., VS Code Copilot) don't | ||
| properly handle $ref in tool input schemas. | ||
|
|
||
| For self-referencing/circular schemas where full dereferencing is not possible, | ||
| this function falls back to resolving only the root-level $ref while preserving | ||
|
|
@@ -27,19 +30,27 @@ def dereference_refs(schema: dict[str, Any]) -> dict[str, Any]: | |
| Example: | ||
| >>> schema = { | ||
| ... "$defs": {"Category": {"enum": ["a", "b"], "type": "string"}}, | ||
| ... "properties": {"cat": {"$ref": "#/$defs/Category"}} | ||
| ... "properties": {"cat": {"$ref": "#/$defs/Category", "default": "a"}} | ||
| ... } | ||
| >>> resolved = dereference_refs(schema) | ||
| >>> # Result: {"properties": {"cat": {"enum": ["a", "b"], "type": "string"}}} | ||
| >>> # Result: {"properties": {"cat": {"enum": ["a", "b"], "type": "string", "default": "a"}}} | ||
| """ | ||
| try: | ||
| # Use jsonref to resolve all $ref references | ||
| # proxies=False returns plain dicts (not proxy objects) | ||
| # lazy_load=False resolves immediately | ||
| dereferenced = replace_refs(schema, proxies=False, lazy_load=False) | ||
|
|
||
| # Merge sibling keywords that were lost during dereferencing | ||
| # Pydantic puts description, default, examples as siblings to $ref | ||
| defs = schema.get("$defs", {}) | ||
| merged = _merge_ref_siblings(schema, dereferenced, defs) | ||
| # Type assertion: top-level schema is always a dict | ||
| assert isinstance(merged, dict) | ||
| dereferenced = merged | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
|
|
||
| # Remove $defs since all references have been resolved | ||
| if isinstance(dereferenced, dict) and "$defs" in dereferenced: | ||
| if "$defs" in dereferenced: | ||
| dereferenced = {k: v for k, v in dereferenced.items() if k != "$defs"} | ||
|
|
||
| return dereferenced | ||
|
|
@@ -50,6 +61,73 @@ def dereference_refs(schema: dict[str, Any]) -> dict[str, Any]: | |
| return resolve_root_ref(schema) | ||
|
|
||
|
|
||
| def _merge_ref_siblings( | ||
| original: Any, | ||
| dereferenced: Any, | ||
| defs: dict[str, Any], | ||
| visited: set[str] | None = None, | ||
| ) -> Any: | ||
| """Merge sibling keywords from original $ref nodes into dereferenced schema. | ||
|
|
||
| When jsonref resolves $ref, it replaces the entire node with the referenced | ||
| definition, losing any sibling keywords like description, default, or examples. | ||
| This function walks both trees in parallel and merges those siblings back. | ||
|
|
||
| Args: | ||
| original: The original schema with $ref and potential siblings | ||
| dereferenced: The schema after jsonref processing | ||
| defs: The $defs from the original schema, for looking up referenced definitions | ||
| visited: Set of definition names already being processed (prevents cycles) | ||
|
|
||
| Returns: | ||
| The dereferenced schema with sibling keywords restored | ||
| """ | ||
| if visited is None: | ||
| visited = set() | ||
|
|
||
| if isinstance(original, dict) and isinstance(dereferenced, dict): | ||
| # Check if original had a $ref | ||
| if "$ref" in original: | ||
| ref = original["$ref"] | ||
| siblings = {k: v for k, v in original.items() if k not in ("$ref", "$defs")} | ||
|
|
||
| # Look up the referenced definition to process its nested siblings | ||
| if isinstance(ref, str) and ref.startswith("#/$defs/"): | ||
| def_name = ref.split("/")[-1] | ||
| # Prevent infinite recursion on circular references | ||
| if def_name in defs and def_name not in visited: | ||
| # Recursively process the definition's content for nested siblings | ||
| dereferenced = _merge_ref_siblings( | ||
| defs[def_name], dereferenced, defs, visited | {def_name} | ||
| ) | ||
|
|
||
| if siblings: | ||
| # Merge local siblings, which take precedence | ||
| merged = dict(dereferenced) | ||
| merged.update(siblings) | ||
| return merged | ||
| return dereferenced | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
|
|
||
| # Recurse into nested structures | ||
| result = {} | ||
| for key, value in dereferenced.items(): | ||
| if key in original: | ||
| result[key] = _merge_ref_siblings(original[key], value, defs, visited) | ||
| else: | ||
| result[key] = value | ||
| return result | ||
|
|
||
| elif isinstance(original, list) and isinstance(dereferenced, list): | ||
| # Process list items in parallel | ||
| min_len = min(len(original), len(dereferenced)) | ||
| return [ | ||
| _merge_ref_siblings(o, d, defs, visited) | ||
| for o, d in zip(original[:min_len], dereferenced[:min_len], strict=False) | ||
| ] + dereferenced[min_len:] | ||
|
|
||
| return dereferenced | ||
|
|
||
|
Comment on lines
+64
to
+129
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: find . -type f -name "*test*.py" | head -20Repository: jlowin/fastmcp Length of output: 135 🏁 Script executed: rg -l "json_schema|_merge_ref_siblings|dereference_refs" --type=py | grep -i testRepository: jlowin/fastmcp Length of output: 40 🏁 Script executed: rg -n "_merge_ref_siblings" --type=pyRepository: jlowin/fastmcp Length of output: 440 🏁 Script executed: cat -n ./src/fastmcp/utilities/tests.pyRepository: jlowin/fastmcp Length of output: 11290 🏁 Script executed: rg -n "json_schema|dereference" ./src/fastmcp/utilities/tests.py -A 5 -B 2Repository: jlowin/fastmcp Length of output: 40 🏁 Script executed: rg -n "def test" ./src/fastmcp/utilities/tests.pyRepository: jlowin/fastmcp Length of output: 104 🏁 Script executed: find . -type f -name "*test_json*" -o -name "*json*test*" | grep -E '\.py$'Repository: jlowin/fastmcp Length of output: 40 🏁 Script executed: rg -l "dereference_refs" --type=pyRepository: jlowin/fastmcp Length of output: 94 🏁 Script executed: find . -path "*/tests/*" -name "*.py" -type f | grep -E "(json|schema)" | head -20Repository: jlowin/fastmcp Length of output: 40 🏁 Script executed: find . -path "*/test*" -name "*.py" -type f | head -30Repository: jlowin/fastmcp Length of output: 169 🏁 Script executed: rg -n "dereference_refs|_merge_ref_siblings|json_schema" --type=py -l | grep -i testRepository: jlowin/fastmcp Length of output: 40 🏁 Script executed: cat -n src/fastmcp/utilities/json_schema.py | head -60Repository: jlowin/fastmcp Length of output: 2907 🏁 Script executed: rg -n "import.*json_schema|from.*json_schema" --type=pyRepository: jlowin/fastmcp Length of output: 1341 🏁 Script executed: find . -type d -name tests | xargs -I {} find {} -name "*.py" -type fRepository: jlowin/fastmcp Length of output: 102 🏁 Script executed: rg -n "json_schema|dereference_refs|_merge_ref_siblings" ./examples/testing_demo/tests/test_server.py -A 3 -B 2Repository: jlowin/fastmcp Length of output: 40 🏁 Script executed: cat -n ./examples/testing_demo/tests/test_server.py | head -100Repository: jlowin/fastmcp Length of output: 3863 🏁 Script executed: find . -type f -name "conftest.py" -o -type f -path "*/tests/test_*.py" | head -20Repository: jlowin/fastmcp Length of output: 102 🏁 Script executed: find . -type f -name "*.py" -path "*/test*" | xargs grep -l "compress_schema\|dereference" 2>/dev/nullRepository: jlowin/fastmcp Length of output: 42 🏁 Script executed: rg -r "def compress_schema|def dereference_refs" --type=py -A 2Repository: jlowin/fastmcp Length of output: 454 🏁 Script executed: find . -type f -name "test_*.py" -o -type f -name "*_test.py" | wc -lRepository: jlowin/fastmcp Length of output: 59 🏁 Script executed: cat -n src/fastmcp/utilities/json_schema.pyRepository: jlowin/fastmcp Length of output: 18076 Add comprehensive test coverage for the dereference/merge logic. The
|
||
|
|
||
| def resolve_root_ref(schema: dict[str, Any]) -> dict[str, Any]: | ||
| """Resolve $ref at root level to meet MCP spec requirements. | ||
|
|
||
|
|
@@ -89,7 +167,7 @@ def resolve_root_ref(schema: dict[str, Any]) -> dict[str, Any]: | |
| return schema | ||
|
|
||
|
|
||
| def _prune_param(schema: dict, param: str) -> dict: | ||
| def _prune_param(schema: dict[str, Any], param: str) -> dict[str, Any]: | ||
| """Return a new schema with *param* removed from `properties`, `required`, | ||
| and (if no longer referenced) `$defs`. | ||
| """ | ||
|
|
@@ -111,11 +189,11 @@ def _prune_param(schema: dict, param: str) -> dict: | |
|
|
||
|
|
||
| def _single_pass_optimize( | ||
| schema: dict, | ||
| schema: dict[str, Any], | ||
| prune_titles: bool = False, | ||
| prune_additional_properties: bool = False, | ||
| prune_defs: bool = True, | ||
| ) -> dict: | ||
| ) -> dict[str, Any]: | ||
| """ | ||
| Optimize JSON schemas in a single traversal for better performance. | ||
|
|
||
|
|
@@ -284,11 +362,11 @@ def is_def_used(def_name: str, visiting: set[str] | None = None) -> bool: | |
|
|
||
|
|
||
| def compress_schema( | ||
| schema: dict, | ||
| schema: dict[str, Any], | ||
| prune_params: list[str] | None = None, | ||
| prune_additional_properties: bool = True, | ||
| prune_titles: bool = False, | ||
| ) -> dict: | ||
| ) -> dict[str, Any]: | ||
| """ | ||
| Compress and optimize a JSON schema for MCP compatibility. | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
replace_refsreplaces any{"$ref": ...}object with the referenced schema, which drops sibling keywords on that node (e.g.,description,default,examples, field-level constraints). Pydantic commonly emits those siblings when a field references a model with its own metadata, so dereferencing here can silently strip that metadata from tool schemas and change what clients see. Consider merging the referenced schema with the local node (minus$ref) or reapplying local keys after deref so field-level annotations survive inlining.Useful? React with 👍 / 👎.