From ad7fc0a9b12fccf43380ebea50916c9f179bd9f0 Mon Sep 17 00:00:00 2001
From: ckkut001_2 <k.kutrowski@pl-businesssolutions.pl>
Date: Fri, 5 Sep 2025 10:57:51 +0000
Subject: [PATCH 1/2] Fix: Handle nested objects in array items for JSON schema
 conversion

  - Modified _extract_field_type to recursively process object schemas in arrays
  - Added test case for array items with defined object properties
  - Fixes issue #6991
---
 .../autogen_core/utils/_json_to_pydantic.py   |  16 ++
 .../tests/test_json_to_pydantic.py            | 230 ++++++++++++++++++
 2 files changed, 246 insertions(+)

diff --git a/python/packages/autogen-core/src/autogen_core/utils/_json_to_pydantic.py b/python/packages/autogen-core/src/autogen_core/utils/_json_to_pydantic.py
index 0342a49e2b87..5ac79beb47a0 100644
--- a/python/packages/autogen-core/src/autogen_core/utils/_json_to_pydantic.py
+++ b/python/packages/autogen-core/src/autogen_core/utils/_json_to_pydantic.py
@@ -128,6 +128,17 @@ def get_ref(self, ref_name: str) -> Any:
 
         return self._model_cache[ref_name]
 
+    def _get_item_model_name(self, array_field_name: str, parent_model_name: str) -> str:
+        """Generate hash-based model names for array items to keep names short and unique."""
+        import hashlib
+        
+        # Create a short hash of the full path to ensure uniqueness
+        full_path = f"{parent_model_name}_{array_field_name}"
+        hash_suffix = hashlib.md5(full_path.encode()).hexdigest()[:6]
+        
+        # Use field name as-is with hash suffix
+        return f"{array_field_name}_{hash_suffix}"
+
     def _process_definitions(self, root_schema: Dict[str, Any]) -> None:
         if "$defs" in root_schema:
             for model_name in root_schema["$defs"]:
@@ -253,6 +264,11 @@ def _extract_field_type(self, key: str, value: Dict[str, Any], model_name: str,
             item_schema = value.get("items", {"type": "string"})
             if "$ref" in item_schema:
                 item_type = self.get_ref(item_schema["$ref"].split("/")[-1])
+            elif item_schema.get("type") == "object" and "properties" in item_schema:
+                # Handle array items that are objects with properties - create a nested model
+                # Use hash-based naming to keep names short and unique
+                item_model_name = self._get_item_model_name(key, model_name)
+                item_type = self._json_schema_to_model(item_schema, item_model_name, root_schema)
             else:
                 item_type_name = item_schema.get("type")
                 if item_type_name is None:
diff --git a/python/packages/autogen-core/tests/test_json_to_pydantic.py b/python/packages/autogen-core/tests/test_json_to_pydantic.py
index 0387e228e028..e78667f49053 100644
--- a/python/packages/autogen-core/tests/test_json_to_pydantic.py
+++ b/python/packages/autogen-core/tests/test_json_to_pydantic.py
@@ -834,3 +834,233 @@ def test_unknown_format_raises() -> None:
     converter = _JSONSchemaToPydantic()
     with pytest.raises(FormatNotSupportedError):
         converter.json_schema_to_pydantic(schema, "UnknownFormatModel")
+
+def test_array_items_with_object_schema_properties() -> None:
+    """Test that array items with object schemas create proper Pydantic models."""
+    schema = {
+        "type": "object",
+        "properties": {
+            "users": {
+                "type": "array",
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "name": {"type": "string"},
+                        "email": {"type": "string"},
+                        "age": {"type": "integer"}
+                    },
+                    "required": ["name", "email"]
+                }
+            }
+        }
+    }
+    
+    converter = _JSONSchemaToPydantic()
+    Model = converter.json_schema_to_pydantic(schema, "UserListModel")
+    
+    # Verify the users field has correct type annotation
+    users_field = Model.model_fields["users"]
+    from typing import get_args, get_origin, Union
+    
+    # Extract inner type from Optional[List[...]]
+    actual_list_type = users_field.annotation
+    if get_origin(users_field.annotation) is Union:
+        union_args = get_args(users_field.annotation)
+        for arg in union_args:
+            if get_origin(arg) is list:
+                actual_list_type = arg
+                break
+    
+    assert get_origin(actual_list_type) is list
+    inner_type = get_args(actual_list_type)[0]
+    
+    # Verify array items are BaseModel subclasses, not dict
+    assert inner_type is not dict
+    assert hasattr(inner_type, 'model_fields')
+    
+    # Verify expected fields are present
+    expected_fields = {"name", "email", "age"}
+    actual_fields = set(inner_type.model_fields.keys())
+    assert expected_fields.issubset(actual_fields)
+    
+    # Test instantiation and field access
+    test_data = {
+        "users": [
+            {"name": "Alice", "email": "alice@example.com", "age": 30},
+            {"name": "Bob", "email": "bob@example.com"}
+        ]
+    }
+    
+    instance = Model(**test_data)
+    assert len(instance.users) == 2  # type: ignore[attr-defined]
+    
+    first_user = instance.users[0]  # type: ignore[attr-defined]
+    assert hasattr(first_user, 'model_fields')
+    assert not isinstance(first_user, dict)
+    
+    # Test attribute access (BaseModel behavior)
+    assert first_user.name == "Alice"  # type: ignore[attr-defined]
+    assert first_user.email == "alice@example.com"  # type: ignore[attr-defined]
+    assert first_user.age == 30  # type: ignore[attr-defined]
+
+
+def test_nested_arrays_with_object_schemas() -> None:
+    """Test deeply nested arrays with object schemas create proper Pydantic models."""
+    schema = {
+        "type": "object",
+        "properties": {
+            "companies": {
+                "type": "array",
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "name": {"type": "string"},
+                        "departments": {
+                            "type": "array",
+                            "items": {
+                                "type": "object",
+                                "properties": {
+                                    "name": {"type": "string"},
+                                    "employees": {
+                                        "type": "array",
+                                        "items": {
+                                            "type": "object",
+                                            "properties": {
+                                                "name": {"type": "string"},
+                                                "role": {"type": "string"},
+                                                "skills": {
+                                                    "type": "array",
+                                                    "items": {"type": "string"}
+                                                }
+                                            },
+                                            "required": ["name", "role"]
+                                        }
+                                    }
+                                },
+                                "required": ["name"]
+                            }
+                        }
+                    },
+                    "required": ["name"]
+                }
+            }
+        }
+    }
+    
+    converter = _JSONSchemaToPydantic()
+    Model = converter.json_schema_to_pydantic(schema, "CompanyListModel")
+        
+    # Verify companies field type annotation
+    companies_field = Model.model_fields["companies"]
+    from typing import get_args, get_origin, Union
+    
+    # Extract companies inner type
+    actual_list_type = companies_field.annotation
+    if get_origin(companies_field.annotation) is Union:
+        union_args = get_args(companies_field.annotation)
+        for arg in union_args:
+            if get_origin(arg) is list:
+                actual_list_type = arg
+                break
+    
+    assert get_origin(actual_list_type) is list
+    company_type = get_args(actual_list_type)[0]
+    
+    # Verify companies are BaseModel subclasses
+    assert company_type is not dict
+    assert hasattr(company_type, 'model_fields')
+    assert "name" in company_type.model_fields
+    assert "departments" in company_type.model_fields
+    
+    # Verify departments field type annotation  
+    departments_field = company_type.model_fields["departments"]
+    dept_list_type = departments_field.annotation
+    if get_origin(dept_list_type) is Union:
+        union_args = get_args(dept_list_type)
+        for arg in union_args:
+            if get_origin(arg) is list:
+                dept_list_type = arg
+                break
+    
+    assert get_origin(dept_list_type) is list
+    department_type = get_args(dept_list_type)[0]
+    
+    # Verify departments are BaseModel subclasses
+    assert department_type is not dict
+    assert hasattr(department_type, 'model_fields')
+    assert "name" in department_type.model_fields
+    assert "employees" in department_type.model_fields
+    
+    # Verify employees field type annotation
+    employees_field = department_type.model_fields["employees"]
+    emp_list_type = employees_field.annotation
+    if get_origin(emp_list_type) is Union:
+        union_args = get_args(emp_list_type)
+        for arg in union_args:
+            if get_origin(arg) is list:
+                emp_list_type = arg
+                break
+    
+    assert get_origin(emp_list_type) is list
+    employee_type = get_args(emp_list_type)[0]
+    
+    # Verify employees are BaseModel subclasses
+    assert employee_type is not dict
+    assert hasattr(employee_type, 'model_fields')
+    expected_emp_fields = {"name", "role", "skills"}
+    actual_emp_fields = set(employee_type.model_fields.keys())
+    assert expected_emp_fields.issubset(actual_emp_fields)
+    
+    # Test instantiation with nested data
+    test_data = {
+        "companies": [
+            {
+                "name": "TechCorp",
+                "departments": [
+                    {
+                        "name": "Engineering",
+                        "employees": [
+                            {
+                                "name": "Alice",
+                                "role": "Senior Developer",
+                                "skills": ["Python", "JavaScript", "Docker"]
+                            },
+                            {
+                                "name": "Bob", 
+                                "role": "DevOps Engineer",
+                                "skills": ["Kubernetes", "AWS"]
+                            }
+                        ]
+                    },
+                    {
+                        "name": "Marketing",
+                        "employees": [
+                            {
+                                "name": "Carol",
+                                "role": "Marketing Manager"
+                            }
+                        ]
+                    }
+                ]
+            }
+        ]
+    }
+    
+    instance = Model(**test_data)
+    assert len(instance.companies) == 1  # type: ignore[attr-defined]
+    
+    company = instance.companies[0]  # type: ignore[attr-defined]
+    assert hasattr(company, 'model_fields')
+    assert company.name == "TechCorp"  # type: ignore[attr-defined]
+    assert len(company.departments) == 2  # type: ignore[attr-defined]
+    
+    engineering_dept = company.departments[0]  # type: ignore[attr-defined]
+    assert hasattr(engineering_dept, 'model_fields')
+    assert engineering_dept.name == "Engineering"  # type: ignore[attr-defined]
+    assert len(engineering_dept.employees) == 2  # type: ignore[attr-defined]
+    
+    alice = engineering_dept.employees[0]  # type: ignore[attr-defined]
+    assert hasattr(alice, 'model_fields')
+    assert alice.name == "Alice"  # type: ignore[attr-defined]
+    assert alice.role == "Senior Developer"  # type: ignore[attr-defined]
+    assert alice.skills == ["Python", "JavaScript", "Docker"]  # type: ignore[attr-defined]

From fe3d308ac2b4d220afe5999663d276aac922d17f Mon Sep 17 00:00:00 2001
From: Eric Zhu <ekzhu@users.noreply.github.com>
Date: Wed, 17 Sep 2025 18:01:40 -0700
Subject: [PATCH 2/2] fix format and type

---
 .../autogen_core/utils/_json_to_pydantic.py   |   4 +-
 .../tests/test_json_to_pydantic.py            | 138 ++++++++----------
 2 files changed, 60 insertions(+), 82 deletions(-)

diff --git a/python/packages/autogen-core/src/autogen_core/utils/_json_to_pydantic.py b/python/packages/autogen-core/src/autogen_core/utils/_json_to_pydantic.py
index 5ac79beb47a0..e881d151a9fd 100644
--- a/python/packages/autogen-core/src/autogen_core/utils/_json_to_pydantic.py
+++ b/python/packages/autogen-core/src/autogen_core/utils/_json_to_pydantic.py
@@ -131,11 +131,11 @@ def get_ref(self, ref_name: str) -> Any:
     def _get_item_model_name(self, array_field_name: str, parent_model_name: str) -> str:
         """Generate hash-based model names for array items to keep names short and unique."""
         import hashlib
-        
+
         # Create a short hash of the full path to ensure uniqueness
         full_path = f"{parent_model_name}_{array_field_name}"
         hash_suffix = hashlib.md5(full_path.encode()).hexdigest()[:6]
-        
+
         # Use field name as-is with hash suffix
         return f"{array_field_name}_{hash_suffix}"
 
diff --git a/python/packages/autogen-core/tests/test_json_to_pydantic.py b/python/packages/autogen-core/tests/test_json_to_pydantic.py
index e78667f49053..0efad58b4ebc 100644
--- a/python/packages/autogen-core/tests/test_json_to_pydantic.py
+++ b/python/packages/autogen-core/tests/test_json_to_pydantic.py
@@ -835,6 +835,7 @@ def test_unknown_format_raises() -> None:
     with pytest.raises(FormatNotSupportedError):
         converter.json_schema_to_pydantic(schema, "UnknownFormatModel")
 
+
 def test_array_items_with_object_schema_properties() -> None:
     """Test that array items with object schemas create proper Pydantic models."""
     schema = {
@@ -844,24 +845,20 @@ def test_array_items_with_object_schema_properties() -> None:
                 "type": "array",
                 "items": {
                     "type": "object",
-                    "properties": {
-                        "name": {"type": "string"},
-                        "email": {"type": "string"},
-                        "age": {"type": "integer"}
-                    },
-                    "required": ["name", "email"]
-                }
+                    "properties": {"name": {"type": "string"}, "email": {"type": "string"}, "age": {"type": "integer"}},
+                    "required": ["name", "email"],
+                },
             }
-        }
+        },
     }
-    
+
     converter = _JSONSchemaToPydantic()
     Model = converter.json_schema_to_pydantic(schema, "UserListModel")
-    
+
     # Verify the users field has correct type annotation
     users_field = Model.model_fields["users"]
-    from typing import get_args, get_origin, Union
-    
+    from typing import Union, get_args, get_origin
+
     # Extract inner type from Optional[List[...]]
     actual_list_type = users_field.annotation
     if get_origin(users_field.annotation) is Union:
@@ -870,34 +867,34 @@ def test_array_items_with_object_schema_properties() -> None:
             if get_origin(arg) is list:
                 actual_list_type = arg
                 break
-    
+
     assert get_origin(actual_list_type) is list
     inner_type = get_args(actual_list_type)[0]
-    
+
     # Verify array items are BaseModel subclasses, not dict
     assert inner_type is not dict
-    assert hasattr(inner_type, 'model_fields')
-    
+    assert hasattr(inner_type, "model_fields")
+
     # Verify expected fields are present
     expected_fields = {"name", "email", "age"}
     actual_fields = set(inner_type.model_fields.keys())
     assert expected_fields.issubset(actual_fields)
-    
+
     # Test instantiation and field access
     test_data = {
         "users": [
             {"name": "Alice", "email": "alice@example.com", "age": 30},
-            {"name": "Bob", "email": "bob@example.com"}
+            {"name": "Bob", "email": "bob@example.com"},
         ]
     }
-    
+
     instance = Model(**test_data)
     assert len(instance.users) == 2  # type: ignore[attr-defined]
-    
+
     first_user = instance.users[0]  # type: ignore[attr-defined]
-    assert hasattr(first_user, 'model_fields')
+    assert hasattr(first_user, "model_fields")  # type: ignore[reportUnknownArgumentType]
     assert not isinstance(first_user, dict)
-    
+
     # Test attribute access (BaseModel behavior)
     assert first_user.name == "Alice"  # type: ignore[attr-defined]
     assert first_user.email == "alice@example.com"  # type: ignore[attr-defined]
@@ -928,32 +925,29 @@ def test_nested_arrays_with_object_schemas() -> None:
                                             "properties": {
                                                 "name": {"type": "string"},
                                                 "role": {"type": "string"},
-                                                "skills": {
-                                                    "type": "array",
-                                                    "items": {"type": "string"}
-                                                }
+                                                "skills": {"type": "array", "items": {"type": "string"}},
                                             },
-                                            "required": ["name", "role"]
-                                        }
-                                    }
+                                            "required": ["name", "role"],
+                                        },
+                                    },
                                 },
-                                "required": ["name"]
-                            }
-                        }
+                                "required": ["name"],
+                            },
+                        },
                     },
-                    "required": ["name"]
-                }
+                    "required": ["name"],
+                },
             }
-        }
+        },
     }
-    
+
     converter = _JSONSchemaToPydantic()
     Model = converter.json_schema_to_pydantic(schema, "CompanyListModel")
-        
+
     # Verify companies field type annotation
     companies_field = Model.model_fields["companies"]
-    from typing import get_args, get_origin, Union
-    
+    from typing import Union, get_args, get_origin
+
     # Extract companies inner type
     actual_list_type = companies_field.annotation
     if get_origin(companies_field.annotation) is Union:
@@ -962,17 +956,17 @@ def test_nested_arrays_with_object_schemas() -> None:
             if get_origin(arg) is list:
                 actual_list_type = arg
                 break
-    
+
     assert get_origin(actual_list_type) is list
     company_type = get_args(actual_list_type)[0]
-    
+
     # Verify companies are BaseModel subclasses
     assert company_type is not dict
-    assert hasattr(company_type, 'model_fields')
+    assert hasattr(company_type, "model_fields")
     assert "name" in company_type.model_fields
     assert "departments" in company_type.model_fields
-    
-    # Verify departments field type annotation  
+
+    # Verify departments field type annotation
     departments_field = company_type.model_fields["departments"]
     dept_list_type = departments_field.annotation
     if get_origin(dept_list_type) is Union:
@@ -981,16 +975,16 @@ def test_nested_arrays_with_object_schemas() -> None:
             if get_origin(arg) is list:
                 dept_list_type = arg
                 break
-    
+
     assert get_origin(dept_list_type) is list
     department_type = get_args(dept_list_type)[0]
-    
+
     # Verify departments are BaseModel subclasses
     assert department_type is not dict
-    assert hasattr(department_type, 'model_fields')
+    assert hasattr(department_type, "model_fields")
     assert "name" in department_type.model_fields
     assert "employees" in department_type.model_fields
-    
+
     # Verify employees field type annotation
     employees_field = department_type.model_fields["employees"]
     emp_list_type = employees_field.annotation
@@ -1000,17 +994,17 @@ def test_nested_arrays_with_object_schemas() -> None:
             if get_origin(arg) is list:
                 emp_list_type = arg
                 break
-    
+
     assert get_origin(emp_list_type) is list
     employee_type = get_args(emp_list_type)[0]
-    
+
     # Verify employees are BaseModel subclasses
     assert employee_type is not dict
-    assert hasattr(employee_type, 'model_fields')
+    assert hasattr(employee_type, "model_fields")
     expected_emp_fields = {"name", "role", "skills"}
     actual_emp_fields = set(employee_type.model_fields.keys())
     assert expected_emp_fields.issubset(actual_emp_fields)
-    
+
     # Test instantiation with nested data
     test_data = {
         "companies": [
@@ -1020,47 +1014,31 @@ def test_nested_arrays_with_object_schemas() -> None:
                     {
                         "name": "Engineering",
                         "employees": [
-                            {
-                                "name": "Alice",
-                                "role": "Senior Developer",
-                                "skills": ["Python", "JavaScript", "Docker"]
-                            },
-                            {
-                                "name": "Bob", 
-                                "role": "DevOps Engineer",
-                                "skills": ["Kubernetes", "AWS"]
-                            }
-                        ]
+                            {"name": "Alice", "role": "Senior Developer", "skills": ["Python", "JavaScript", "Docker"]},
+                            {"name": "Bob", "role": "DevOps Engineer", "skills": ["Kubernetes", "AWS"]},
+                        ],
                     },
-                    {
-                        "name": "Marketing",
-                        "employees": [
-                            {
-                                "name": "Carol",
-                                "role": "Marketing Manager"
-                            }
-                        ]
-                    }
-                ]
+                    {"name": "Marketing", "employees": [{"name": "Carol", "role": "Marketing Manager"}]},
+                ],
             }
         ]
     }
-    
+
     instance = Model(**test_data)
     assert len(instance.companies) == 1  # type: ignore[attr-defined]
-    
+
     company = instance.companies[0]  # type: ignore[attr-defined]
-    assert hasattr(company, 'model_fields')
+    assert hasattr(company, "model_fields")  # type: ignore[reportUnknownArgumentType]
     assert company.name == "TechCorp"  # type: ignore[attr-defined]
     assert len(company.departments) == 2  # type: ignore[attr-defined]
-    
+
     engineering_dept = company.departments[0]  # type: ignore[attr-defined]
-    assert hasattr(engineering_dept, 'model_fields')
+    assert hasattr(engineering_dept, "model_fields")  # type: ignore[reportUnknownArgumentType]
     assert engineering_dept.name == "Engineering"  # type: ignore[attr-defined]
     assert len(engineering_dept.employees) == 2  # type: ignore[attr-defined]
-    
+
     alice = engineering_dept.employees[0]  # type: ignore[attr-defined]
-    assert hasattr(alice, 'model_fields')
+    assert hasattr(alice, "model_fields")  # type: ignore[reportUnknownArgumentType]
     assert alice.name == "Alice"  # type: ignore[attr-defined]
     assert alice.role == "Senior Developer"  # type: ignore[attr-defined]
     assert alice.skills == ["Python", "JavaScript", "Docker"]  # type: ignore[attr-defined]