From ad7fc0a9b12fccf43380ebea50916c9f179bd9f0 Mon Sep 17 00:00:00 2001 From: ckkut001_2 Date: Fri, 5 Sep 2025 10:57:51 +0000 Subject: [PATCH 1/2] Fix: Handle nested objects in array items for JSON schema conversion - Modified _extract_field_type to recursively process object schemas in arrays - Added test case for array items with defined object properties - Fixes issue #6991 --- .../autogen_core/utils/_json_to_pydantic.py | 16 ++ .../tests/test_json_to_pydantic.py | 230 ++++++++++++++++++ 2 files changed, 246 insertions(+) diff --git a/python/packages/autogen-core/src/autogen_core/utils/_json_to_pydantic.py b/python/packages/autogen-core/src/autogen_core/utils/_json_to_pydantic.py index 0342a49e2b87..5ac79beb47a0 100644 --- a/python/packages/autogen-core/src/autogen_core/utils/_json_to_pydantic.py +++ b/python/packages/autogen-core/src/autogen_core/utils/_json_to_pydantic.py @@ -128,6 +128,17 @@ def get_ref(self, ref_name: str) -> Any: return self._model_cache[ref_name] + def _get_item_model_name(self, array_field_name: str, parent_model_name: str) -> str: + """Generate hash-based model names for array items to keep names short and unique.""" + import hashlib + + # Create a short hash of the full path to ensure uniqueness + full_path = f"{parent_model_name}_{array_field_name}" + hash_suffix = hashlib.md5(full_path.encode()).hexdigest()[:6] + + # Use field name as-is with hash suffix + return f"{array_field_name}_{hash_suffix}" + def _process_definitions(self, root_schema: Dict[str, Any]) -> None: if "$defs" in root_schema: for model_name in root_schema["$defs"]: @@ -253,6 +264,11 @@ def _extract_field_type(self, key: str, value: Dict[str, Any], model_name: str, item_schema = value.get("items", {"type": "string"}) if "$ref" in item_schema: item_type = self.get_ref(item_schema["$ref"].split("/")[-1]) + elif item_schema.get("type") == "object" and "properties" in item_schema: + # Handle array items that are objects with properties - create a nested model + # Use hash-based naming to keep names short and unique + item_model_name = self._get_item_model_name(key, model_name) + item_type = self._json_schema_to_model(item_schema, item_model_name, root_schema) else: item_type_name = item_schema.get("type") if item_type_name is None: diff --git a/python/packages/autogen-core/tests/test_json_to_pydantic.py b/python/packages/autogen-core/tests/test_json_to_pydantic.py index 0387e228e028..e78667f49053 100644 --- a/python/packages/autogen-core/tests/test_json_to_pydantic.py +++ b/python/packages/autogen-core/tests/test_json_to_pydantic.py @@ -834,3 +834,233 @@ def test_unknown_format_raises() -> None: converter = _JSONSchemaToPydantic() with pytest.raises(FormatNotSupportedError): converter.json_schema_to_pydantic(schema, "UnknownFormatModel") + +def test_array_items_with_object_schema_properties() -> None: + """Test that array items with object schemas create proper Pydantic models.""" + schema = { + "type": "object", + "properties": { + "users": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "email": {"type": "string"}, + "age": {"type": "integer"} + }, + "required": ["name", "email"] + } + } + } + } + + converter = _JSONSchemaToPydantic() + Model = converter.json_schema_to_pydantic(schema, "UserListModel") + + # Verify the users field has correct type annotation + users_field = Model.model_fields["users"] + from typing import get_args, get_origin, Union + + # Extract inner type from Optional[List[...]] + actual_list_type = users_field.annotation + if get_origin(users_field.annotation) is Union: + union_args = get_args(users_field.annotation) + for arg in union_args: + if get_origin(arg) is list: + actual_list_type = arg + break + + assert get_origin(actual_list_type) is list + inner_type = get_args(actual_list_type)[0] + + # Verify array items are BaseModel subclasses, not dict + assert inner_type is not dict + assert hasattr(inner_type, 'model_fields') + + # Verify expected fields are present + expected_fields = {"name", "email", "age"} + actual_fields = set(inner_type.model_fields.keys()) + assert expected_fields.issubset(actual_fields) + + # Test instantiation and field access + test_data = { + "users": [ + {"name": "Alice", "email": "alice@example.com", "age": 30}, + {"name": "Bob", "email": "bob@example.com"} + ] + } + + instance = Model(**test_data) + assert len(instance.users) == 2 # type: ignore[attr-defined] + + first_user = instance.users[0] # type: ignore[attr-defined] + assert hasattr(first_user, 'model_fields') + assert not isinstance(first_user, dict) + + # Test attribute access (BaseModel behavior) + assert first_user.name == "Alice" # type: ignore[attr-defined] + assert first_user.email == "alice@example.com" # type: ignore[attr-defined] + assert first_user.age == 30 # type: ignore[attr-defined] + + +def test_nested_arrays_with_object_schemas() -> None: + """Test deeply nested arrays with object schemas create proper Pydantic models.""" + schema = { + "type": "object", + "properties": { + "companies": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "departments": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "employees": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "role": {"type": "string"}, + "skills": { + "type": "array", + "items": {"type": "string"} + } + }, + "required": ["name", "role"] + } + } + }, + "required": ["name"] + } + } + }, + "required": ["name"] + } + } + } + } + + converter = _JSONSchemaToPydantic() + Model = converter.json_schema_to_pydantic(schema, "CompanyListModel") + + # Verify companies field type annotation + companies_field = Model.model_fields["companies"] + from typing import get_args, get_origin, Union + + # Extract companies inner type + actual_list_type = companies_field.annotation + if get_origin(companies_field.annotation) is Union: + union_args = get_args(companies_field.annotation) + for arg in union_args: + if get_origin(arg) is list: + actual_list_type = arg + break + + assert get_origin(actual_list_type) is list + company_type = get_args(actual_list_type)[0] + + # Verify companies are BaseModel subclasses + assert company_type is not dict + assert hasattr(company_type, 'model_fields') + assert "name" in company_type.model_fields + assert "departments" in company_type.model_fields + + # Verify departments field type annotation + departments_field = company_type.model_fields["departments"] + dept_list_type = departments_field.annotation + if get_origin(dept_list_type) is Union: + union_args = get_args(dept_list_type) + for arg in union_args: + if get_origin(arg) is list: + dept_list_type = arg + break + + assert get_origin(dept_list_type) is list + department_type = get_args(dept_list_type)[0] + + # Verify departments are BaseModel subclasses + assert department_type is not dict + assert hasattr(department_type, 'model_fields') + assert "name" in department_type.model_fields + assert "employees" in department_type.model_fields + + # Verify employees field type annotation + employees_field = department_type.model_fields["employees"] + emp_list_type = employees_field.annotation + if get_origin(emp_list_type) is Union: + union_args = get_args(emp_list_type) + for arg in union_args: + if get_origin(arg) is list: + emp_list_type = arg + break + + assert get_origin(emp_list_type) is list + employee_type = get_args(emp_list_type)[0] + + # Verify employees are BaseModel subclasses + assert employee_type is not dict + assert hasattr(employee_type, 'model_fields') + expected_emp_fields = {"name", "role", "skills"} + actual_emp_fields = set(employee_type.model_fields.keys()) + assert expected_emp_fields.issubset(actual_emp_fields) + + # Test instantiation with nested data + test_data = { + "companies": [ + { + "name": "TechCorp", + "departments": [ + { + "name": "Engineering", + "employees": [ + { + "name": "Alice", + "role": "Senior Developer", + "skills": ["Python", "JavaScript", "Docker"] + }, + { + "name": "Bob", + "role": "DevOps Engineer", + "skills": ["Kubernetes", "AWS"] + } + ] + }, + { + "name": "Marketing", + "employees": [ + { + "name": "Carol", + "role": "Marketing Manager" + } + ] + } + ] + } + ] + } + + instance = Model(**test_data) + assert len(instance.companies) == 1 # type: ignore[attr-defined] + + company = instance.companies[0] # type: ignore[attr-defined] + assert hasattr(company, 'model_fields') + assert company.name == "TechCorp" # type: ignore[attr-defined] + assert len(company.departments) == 2 # type: ignore[attr-defined] + + engineering_dept = company.departments[0] # type: ignore[attr-defined] + assert hasattr(engineering_dept, 'model_fields') + assert engineering_dept.name == "Engineering" # type: ignore[attr-defined] + assert len(engineering_dept.employees) == 2 # type: ignore[attr-defined] + + alice = engineering_dept.employees[0] # type: ignore[attr-defined] + assert hasattr(alice, 'model_fields') + assert alice.name == "Alice" # type: ignore[attr-defined] + assert alice.role == "Senior Developer" # type: ignore[attr-defined] + assert alice.skills == ["Python", "JavaScript", "Docker"] # type: ignore[attr-defined] From fe3d308ac2b4d220afe5999663d276aac922d17f Mon Sep 17 00:00:00 2001 From: Eric Zhu Date: Wed, 17 Sep 2025 18:01:40 -0700 Subject: [PATCH 2/2] fix format and type --- .../autogen_core/utils/_json_to_pydantic.py | 4 +- .../tests/test_json_to_pydantic.py | 138 ++++++++---------- 2 files changed, 60 insertions(+), 82 deletions(-) diff --git a/python/packages/autogen-core/src/autogen_core/utils/_json_to_pydantic.py b/python/packages/autogen-core/src/autogen_core/utils/_json_to_pydantic.py index 5ac79beb47a0..e881d151a9fd 100644 --- a/python/packages/autogen-core/src/autogen_core/utils/_json_to_pydantic.py +++ b/python/packages/autogen-core/src/autogen_core/utils/_json_to_pydantic.py @@ -131,11 +131,11 @@ def get_ref(self, ref_name: str) -> Any: def _get_item_model_name(self, array_field_name: str, parent_model_name: str) -> str: """Generate hash-based model names for array items to keep names short and unique.""" import hashlib - + # Create a short hash of the full path to ensure uniqueness full_path = f"{parent_model_name}_{array_field_name}" hash_suffix = hashlib.md5(full_path.encode()).hexdigest()[:6] - + # Use field name as-is with hash suffix return f"{array_field_name}_{hash_suffix}" diff --git a/python/packages/autogen-core/tests/test_json_to_pydantic.py b/python/packages/autogen-core/tests/test_json_to_pydantic.py index e78667f49053..0efad58b4ebc 100644 --- a/python/packages/autogen-core/tests/test_json_to_pydantic.py +++ b/python/packages/autogen-core/tests/test_json_to_pydantic.py @@ -835,6 +835,7 @@ def test_unknown_format_raises() -> None: with pytest.raises(FormatNotSupportedError): converter.json_schema_to_pydantic(schema, "UnknownFormatModel") + def test_array_items_with_object_schema_properties() -> None: """Test that array items with object schemas create proper Pydantic models.""" schema = { @@ -844,24 +845,20 @@ def test_array_items_with_object_schema_properties() -> None: "type": "array", "items": { "type": "object", - "properties": { - "name": {"type": "string"}, - "email": {"type": "string"}, - "age": {"type": "integer"} - }, - "required": ["name", "email"] - } + "properties": {"name": {"type": "string"}, "email": {"type": "string"}, "age": {"type": "integer"}}, + "required": ["name", "email"], + }, } - } + }, } - + converter = _JSONSchemaToPydantic() Model = converter.json_schema_to_pydantic(schema, "UserListModel") - + # Verify the users field has correct type annotation users_field = Model.model_fields["users"] - from typing import get_args, get_origin, Union - + from typing import Union, get_args, get_origin + # Extract inner type from Optional[List[...]] actual_list_type = users_field.annotation if get_origin(users_field.annotation) is Union: @@ -870,34 +867,34 @@ def test_array_items_with_object_schema_properties() -> None: if get_origin(arg) is list: actual_list_type = arg break - + assert get_origin(actual_list_type) is list inner_type = get_args(actual_list_type)[0] - + # Verify array items are BaseModel subclasses, not dict assert inner_type is not dict - assert hasattr(inner_type, 'model_fields') - + assert hasattr(inner_type, "model_fields") + # Verify expected fields are present expected_fields = {"name", "email", "age"} actual_fields = set(inner_type.model_fields.keys()) assert expected_fields.issubset(actual_fields) - + # Test instantiation and field access test_data = { "users": [ {"name": "Alice", "email": "alice@example.com", "age": 30}, - {"name": "Bob", "email": "bob@example.com"} + {"name": "Bob", "email": "bob@example.com"}, ] } - + instance = Model(**test_data) assert len(instance.users) == 2 # type: ignore[attr-defined] - + first_user = instance.users[0] # type: ignore[attr-defined] - assert hasattr(first_user, 'model_fields') + assert hasattr(first_user, "model_fields") # type: ignore[reportUnknownArgumentType] assert not isinstance(first_user, dict) - + # Test attribute access (BaseModel behavior) assert first_user.name == "Alice" # type: ignore[attr-defined] assert first_user.email == "alice@example.com" # type: ignore[attr-defined] @@ -928,32 +925,29 @@ def test_nested_arrays_with_object_schemas() -> None: "properties": { "name": {"type": "string"}, "role": {"type": "string"}, - "skills": { - "type": "array", - "items": {"type": "string"} - } + "skills": {"type": "array", "items": {"type": "string"}}, }, - "required": ["name", "role"] - } - } + "required": ["name", "role"], + }, + }, }, - "required": ["name"] - } - } + "required": ["name"], + }, + }, }, - "required": ["name"] - } + "required": ["name"], + }, } - } + }, } - + converter = _JSONSchemaToPydantic() Model = converter.json_schema_to_pydantic(schema, "CompanyListModel") - + # Verify companies field type annotation companies_field = Model.model_fields["companies"] - from typing import get_args, get_origin, Union - + from typing import Union, get_args, get_origin + # Extract companies inner type actual_list_type = companies_field.annotation if get_origin(companies_field.annotation) is Union: @@ -962,17 +956,17 @@ def test_nested_arrays_with_object_schemas() -> None: if get_origin(arg) is list: actual_list_type = arg break - + assert get_origin(actual_list_type) is list company_type = get_args(actual_list_type)[0] - + # Verify companies are BaseModel subclasses assert company_type is not dict - assert hasattr(company_type, 'model_fields') + assert hasattr(company_type, "model_fields") assert "name" in company_type.model_fields assert "departments" in company_type.model_fields - - # Verify departments field type annotation + + # Verify departments field type annotation departments_field = company_type.model_fields["departments"] dept_list_type = departments_field.annotation if get_origin(dept_list_type) is Union: @@ -981,16 +975,16 @@ def test_nested_arrays_with_object_schemas() -> None: if get_origin(arg) is list: dept_list_type = arg break - + assert get_origin(dept_list_type) is list department_type = get_args(dept_list_type)[0] - + # Verify departments are BaseModel subclasses assert department_type is not dict - assert hasattr(department_type, 'model_fields') + assert hasattr(department_type, "model_fields") assert "name" in department_type.model_fields assert "employees" in department_type.model_fields - + # Verify employees field type annotation employees_field = department_type.model_fields["employees"] emp_list_type = employees_field.annotation @@ -1000,17 +994,17 @@ def test_nested_arrays_with_object_schemas() -> None: if get_origin(arg) is list: emp_list_type = arg break - + assert get_origin(emp_list_type) is list employee_type = get_args(emp_list_type)[0] - + # Verify employees are BaseModel subclasses assert employee_type is not dict - assert hasattr(employee_type, 'model_fields') + assert hasattr(employee_type, "model_fields") expected_emp_fields = {"name", "role", "skills"} actual_emp_fields = set(employee_type.model_fields.keys()) assert expected_emp_fields.issubset(actual_emp_fields) - + # Test instantiation with nested data test_data = { "companies": [ @@ -1020,47 +1014,31 @@ def test_nested_arrays_with_object_schemas() -> None: { "name": "Engineering", "employees": [ - { - "name": "Alice", - "role": "Senior Developer", - "skills": ["Python", "JavaScript", "Docker"] - }, - { - "name": "Bob", - "role": "DevOps Engineer", - "skills": ["Kubernetes", "AWS"] - } - ] + {"name": "Alice", "role": "Senior Developer", "skills": ["Python", "JavaScript", "Docker"]}, + {"name": "Bob", "role": "DevOps Engineer", "skills": ["Kubernetes", "AWS"]}, + ], }, - { - "name": "Marketing", - "employees": [ - { - "name": "Carol", - "role": "Marketing Manager" - } - ] - } - ] + {"name": "Marketing", "employees": [{"name": "Carol", "role": "Marketing Manager"}]}, + ], } ] } - + instance = Model(**test_data) assert len(instance.companies) == 1 # type: ignore[attr-defined] - + company = instance.companies[0] # type: ignore[attr-defined] - assert hasattr(company, 'model_fields') + assert hasattr(company, "model_fields") # type: ignore[reportUnknownArgumentType] assert company.name == "TechCorp" # type: ignore[attr-defined] assert len(company.departments) == 2 # type: ignore[attr-defined] - + engineering_dept = company.departments[0] # type: ignore[attr-defined] - assert hasattr(engineering_dept, 'model_fields') + assert hasattr(engineering_dept, "model_fields") # type: ignore[reportUnknownArgumentType] assert engineering_dept.name == "Engineering" # type: ignore[attr-defined] assert len(engineering_dept.employees) == 2 # type: ignore[attr-defined] - + alice = engineering_dept.employees[0] # type: ignore[attr-defined] - assert hasattr(alice, 'model_fields') + assert hasattr(alice, "model_fields") # type: ignore[reportUnknownArgumentType] assert alice.name == "Alice" # type: ignore[attr-defined] assert alice.role == "Senior Developer" # type: ignore[attr-defined] assert alice.skills == ["Python", "JavaScript", "Docker"] # type: ignore[attr-defined]