diff --git a/python/semantic_kernel/schema/kernel_json_schema_builder.py b/python/semantic_kernel/schema/kernel_json_schema_builder.py index 34649c8a361f..64ef6f467405 100644 --- a/python/semantic_kernel/schema/kernel_json_schema_builder.py +++ b/python/semantic_kernel/schema/kernel_json_schema_builder.py @@ -1,6 +1,6 @@ # Copyright (c) Microsoft. All rights reserved. -from typing import Any, get_type_hints +from typing import Any, Union, get_args, get_origin, get_type_hints from semantic_kernel.kernel_pydantic import KernelBaseModel @@ -11,12 +11,16 @@ float: "number", list: "array", dict: "object", + set: "array", + tuple: "array", "int": "integer", "str": "string", "bool": "boolean", "float": "number", "list": "array", "dict": "object", + "set": "array", + "tuple": "array", "object": "object", "array": "array", } @@ -26,13 +30,23 @@ class KernelJsonSchemaBuilder: @classmethod def build(cls, parameter_type: type | str, description: str | None = None) -> dict[str, Any]: - """Builds JSON schema for a given parameter type.""" + """Builds the JSON schema for a given parameter type and description. + + Args: + parameter_type (type | str): The parameter type. + description (str, optional): The description of the parameter. Defaults to None. + + Returns: + dict[str, Any]: The JSON schema for the parameter type. + """ if isinstance(parameter_type, str): return cls.build_from_type_name(parameter_type, description) - if issubclass(parameter_type, KernelBaseModel): + if isinstance(parameter_type, KernelBaseModel): return cls.build_model_schema(parameter_type, description) if hasattr(parameter_type, "__annotations__"): return cls.build_model_schema(parameter_type, description) + if hasattr(parameter_type, "__args__"): + return cls.handle_complex_type(parameter_type, description) else: schema = cls.get_json_schema(parameter_type) if description: @@ -41,9 +55,19 @@ def build(cls, parameter_type: type | str, description: str | None = None) -> di @classmethod def build_model_schema(cls, model: type, description: str | None = None) -> dict[str, Any]: - """Builds JSON schema for a given model.""" + """Builds the JSON schema for a given model and description. + + Args: + model (type): The model type. + description (str, optional): The description of the model. Defaults to None. + + Returns: + dict[str, Any]: The JSON schema for the model. + """ properties = {} - for field_name, field_type in get_type_hints(model).items(): + # TODO: add support for handling forward references, which is not currently tested + hints = get_type_hints(model, globals(), locals()) + for field_name, field_type in hints.items(): field_description = None if hasattr(model, "__fields__") and field_name in model.__fields__: field_info = model.__fields__[field_name] @@ -59,7 +83,15 @@ def build_model_schema(cls, model: type, description: str | None = None) -> dict @classmethod def build_from_type_name(cls, parameter_type: str, description: str | None = None) -> dict[str, Any]: - """Builds JSON schema for a given parameter type name.""" + """Builds the JSON schema for a given parameter type name and description. + + Args: + parameter_type (str): The parameter type name. + description (str, optional): The description of the parameter. Defaults to None. + + Returns: + dict[str, Any]: The JSON schema for the parameter type. + """ type_name = TYPE_MAPPING.get(parameter_type, "object") schema = {"type": type_name} if description: @@ -69,7 +101,55 @@ def build_from_type_name(cls, parameter_type: str, description: str | None = Non @classmethod def get_json_schema(cls, parameter_type: type) -> dict[str, Any]: - """Gets JSON schema for a given parameter type.""" + """Gets JSON schema for a given parameter type. + + Args: + parameter_type (type): The parameter type. + + Returns: + dict[str, Any]: The JSON schema for the parameter type. + """ type_name = TYPE_MAPPING.get(parameter_type, "object") schema = {"type": type_name} return schema + + @classmethod + def handle_complex_type(cls, parameter_type: type, description: str | None = None) -> dict[str, Any]: + """Handles building the JSON schema for complex types. + + Args: + parameter_type (type): The parameter type. + description (str, optional): The description of the parameter. Defaults to None. + + Returns: + dict[str, Any]: The JSON schema for the parameter type. + """ + origin = get_origin(parameter_type) + args = get_args(parameter_type) + + if origin is list or origin is set: + item_type = args[0] + return {"type": "array", "items": cls.build(item_type), "description": description} + if origin is dict: + _, value_type = args + additional_properties = cls.build(value_type) + if additional_properties == {"type": "object"}: + additional_properties["properties"] = {} # Account for differences in Python 3.10 dict + return {"type": "object", "additionalProperties": additional_properties, "description": description} + if origin is tuple: + items = [cls.build(arg) for arg in args] + return {"type": "array", "items": items, "description": description} + if origin is Union: + # Handle Optional[T] (Union[T, None]) by making schema nullable + if len(args) == 2 and type(None) in args: + non_none_type = args[0] if args[1] is type(None) else args[1] + schema = cls.build(non_none_type) + schema["nullable"] = True + if description: + schema["description"] = description + return schema + else: + schemas = [cls.build(arg) for arg in args] + return {"anyOf": schemas, "description": description} + else: + return cls.get_json_schema(parameter_type) diff --git a/python/tests/unit/schema/test_schema_builder.py b/python/tests/unit/schema/test_schema_builder.py index f6275af1cb2f..ebc503ce1d48 100644 --- a/python/tests/unit/schema/test_schema_builder.py +++ b/python/tests/unit/schema/test_schema_builder.py @@ -1,5 +1,10 @@ # Copyright (c) Microsoft. All rights reserved. +import json +from typing import Any, Optional, Union +from unittest.mock import Mock + +import pytest from semantic_kernel.kernel_pydantic import KernelBaseModel from semantic_kernel.schema.kernel_json_schema_builder import KernelJsonSchemaBuilder @@ -15,6 +20,35 @@ class AnotherModel: score: float +class MockClass: + name: str = None + age: int = None + + +class MockModel: + __annotations__ = { + "id": int, + "name": str, + "is_active": bool, + "scores": list[int], + "metadata": dict[str, Any], + "tags": set[str], + "coordinates": tuple[int, int], + "status": Union[int, str], + "optional_field": Optional[str], + } + __fields__ = { + "id": Mock(description="The ID of the model"), + "name": Mock(description="The name of the model"), + "is_active": Mock(description="Whether the model is active"), + "tags": Mock(description="Tags associated with the model"), + "status": Mock(description="The status of the model, either as an integer or a string"), + "scores": Mock(description="The scores associated with the model"), + "optional_field": Mock(description="An optional field that can be null"), + "metadata": Mock(description="The optional metadata description"), + } + + def test_build_with_kernel_base_model(): expected_schema = {"type": "object", "properties": {"name": {"type": "string"}, "age": {"type": "integer"}}} result = KernelJsonSchemaBuilder.build(ExampleModel) @@ -71,3 +105,141 @@ def test_get_json_schema(): expected_schema = {"type": "integer"} result = KernelJsonSchemaBuilder.get_json_schema(int) assert result == expected_schema + + +def test_build_list(): + schema = KernelJsonSchemaBuilder.build(list[str]) + assert schema == {"type": "array", "items": {"type": "string"}, "description": None} + + +def test_build_list_complex_type(): + schema = KernelJsonSchemaBuilder.build(list[MockClass]) + assert schema == { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + }, + }, + "description": None, + } + + +def test_build_dict(): + schema = KernelJsonSchemaBuilder.build(dict[str, int]) + assert schema == {"type": "object", "additionalProperties": {"type": "integer"}, "description": None} + + +def test_build_set(): + schema = KernelJsonSchemaBuilder.build(set[int]) + assert schema == {"type": "array", "items": {"type": "integer"}, "description": None} + + +def test_build_tuple(): + schema = KernelJsonSchemaBuilder.build(tuple[int, str]) + assert schema == {"type": "array", "items": [{"type": "integer"}, {"type": "string"}], "description": None} + + +def test_build_union(): + schema = KernelJsonSchemaBuilder.build(Union[int, str]) + assert schema == {"anyOf": [{"type": "integer"}, {"type": "string"}], "description": None} + + +def test_build_optional(): + schema = KernelJsonSchemaBuilder.build(Optional[int]) + assert schema == {"type": "integer", "nullable": True} + + +def test_build_model_schema_for_many_types(): + schema = KernelJsonSchemaBuilder.build(MockModel) + expected = """ +{ + "type": "object", + "properties": { + "id": { + "type": "integer", + "description": "The ID of the model" + }, + "name": { + "type": "string", + "description": "The name of the model" + }, + "is_active": { + "type": "boolean", + "description": "Whether the model is active" + }, + "scores": { + "type": "array", + "items": {"type": "integer"}, + "description": "The scores associated with the model" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "type": "object", + "properties": {} + }, + "description": "The optional metadata description" + }, + "tags": { + "type": "array", + "items": {"type": "string"}, + "description": "Tags associated with the model" + }, + "coordinates": { + "type": "array", + "items": [ + {"type": "integer"}, + {"type": "integer"} + ], + "description": null + }, + "status": { + "anyOf": [ + {"type": "integer"}, + {"type": "string"} + ], + "description": "The status of the model, either as an integer or a string" + }, + "optional_field": { + "type": "string", + "nullable": true, + "description": "An optional field that can be null" + } + } +} +""" + expected_schema = json.loads(expected) + assert schema == expected_schema + + +@pytest.mark.parametrize( + "type_name, expected", + [ + ("int", {"type": "integer"}), + ("str", {"type": "string"}), + ("bool", {"type": "boolean"}), + ("float", {"type": "number"}), + ("list", {"type": "array"}), + ("dict", {"type": "object"}), + ("object", {"type": "object"}), + ("array", {"type": "array"}), + ], +) +def test_build_from_many_type_names(type_name, expected): + assert KernelJsonSchemaBuilder.build_from_type_name(type_name) == expected + + +@pytest.mark.parametrize( + "type_obj, expected", + [ + (int, {"type": "integer"}), + (str, {"type": "string"}), + (bool, {"type": "boolean"}), + (float, {"type": "number"}), + ], +) +def test_get_json_schema_multiple(type_obj, expected): + assert KernelJsonSchemaBuilder.get_json_schema(type_obj) == expected