diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate_aoai.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate_aoai.py
index 766779f179d3..618509ede551 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate_aoai.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate_aoai.py
@@ -2,6 +2,7 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
 
+import json
 import logging
 import re
 
@@ -60,6 +61,7 @@ def _split_evaluators_and_grader_configs(
     :return: Tuple of two dictionaries, the first containing evaluators and the second containing AOAI graders.
     :rtype: Tuple[Dict[str, Callable], Dict[str, AoaiGrader]]
     """
+    LOGGER.info(f"AOAI: Splitting {len(evaluators)} evaluators into AOAI graders and standard evaluators...")
     true_evaluators = {}
     aoai_graders = {}
     for key, value in evaluators.items():
@@ -67,6 +69,7 @@ def _split_evaluators_and_grader_configs(
             aoai_graders[key] = value
         else:
             true_evaluators[key] = value
+    LOGGER.info(f"AOAI: Found {len(aoai_graders)} AOAI graders and {len(true_evaluators)} standard evaluators.")
     return true_evaluators, aoai_graders
 
 
@@ -103,11 +106,18 @@ def _begin_aoai_evaluation(
     LOGGER.info("AOAI: Aoai graders detected among evaluator inputs. Preparing to create OAI eval group...")
     all_eval_run_info: List[OAIEvalRunCreationInfo] = []
 
-    for selected_graders, selected_column_mapping in _get_graders_and_column_mappings(graders, column_mappings):
+    grader_mapping_list = list(_get_graders_and_column_mappings(graders, column_mappings))
+    LOGGER.info(f"AOAI: Will create {len(grader_mapping_list)} separate evaluation run(s) based on column mappings.")
+
+    for idx, (selected_graders, selected_column_mapping) in enumerate(grader_mapping_list):
+        LOGGER.info(
+            f"AOAI: Starting evaluation run {idx + 1}/{len(grader_mapping_list)} with {len(selected_graders)} grader(s)..."
+        )
         all_eval_run_info.append(
             _begin_single_aoai_evaluation(selected_graders, data, selected_column_mapping, run_name)
         )
 
+    LOGGER.info(f"AOAI: Successfully created {len(all_eval_run_info)} evaluation run(s).")
     return all_eval_run_info
 
 
@@ -133,6 +143,7 @@ def _begin_single_aoai_evaluation(
     """
 
     # Format data for eval group creation
+    LOGGER.info(f"AOAI: Preparing evaluation for {len(graders)} grader(s): {list(graders.keys())}")
     grader_name_list = []
     grader_list = []
     # It's expected that all graders supplied for a single eval run use the same credentials
@@ -143,10 +154,12 @@ def _begin_single_aoai_evaluation(
         grader_name_list.append(name)
         grader_list.append(grader._grader_config)
     effective_column_mapping: Dict[str, str] = column_mapping or {}
+    LOGGER.info(f"AOAI: Generating data source config with {len(effective_column_mapping)} column mapping(s)...")
     data_source_config = _generate_data_source_config(data, effective_column_mapping)
+    LOGGER.info(f"AOAI: Data source config generated with schema type: {data_source_config.get('type')}")
 
     # Create eval group
-    # import pdb; pdb.set_trace()
+    LOGGER.info(f"AOAI: Creating eval group with {len(grader_list)} testing criteria...")
     eval_group_info = client.evals.create(
         data_source_config=data_source_config, testing_criteria=grader_list, metadata={"is_foundry_eval": "true"}
     )
@@ -167,6 +180,7 @@ def _begin_single_aoai_evaluation(
         grader_name_map[criteria.id] = name
 
     # Create eval run
+    LOGGER.info(f"AOAI: Creating eval run '{run_name}' with {len(data)} data rows...")
     eval_run_id = _begin_eval_run(client, eval_group_info.id, run_name, data, effective_column_mapping)
     LOGGER.info(
         f"AOAI: Eval run created with id {eval_run_id}."
@@ -197,13 +211,16 @@ def _get_evaluation_run_results(all_run_info: List[OAIEvalRunCreationInfo]) -> T
     :raises EvaluationException: If the evaluation run fails or is not completed before timing out.
     """
 
+    LOGGER.info(f"AOAI: Retrieving results from {len(all_run_info)} evaluation run(s)...")
     run_metrics = {}
     output_df = pd.DataFrame()
-    for run_info in all_run_info:
+    for idx, run_info in enumerate(all_run_info):
+        LOGGER.info(f"AOAI: Fetching results for run {idx + 1}/{len(all_run_info)} (ID: {run_info['eval_run_id']})...")
         cur_output_df, cur_run_metrics = _get_single_run_results(run_info)
         output_df = pd.concat([output_df, cur_output_df], axis=1)
         run_metrics.update(cur_run_metrics)
 
+    LOGGER.info(f"AOAI: Successfully retrieved all results. Combined dataframe shape: {output_df.shape}")
     return output_df, run_metrics
 
 
@@ -223,8 +240,10 @@ def _get_single_run_results(
     :raises EvaluationException: If the evaluation run fails or is not completed before timing out.
     """
     # Wait for evaluation run to complete
+    LOGGER.info(f"AOAI: Waiting for eval run {run_info['eval_run_id']} to complete...")
     run_results = _wait_for_run_conclusion(run_info["client"], run_info["eval_group_id"], run_info["eval_run_id"])
 
+    LOGGER.info(f"AOAI: Eval run {run_info['eval_run_id']} completed with status: {run_results.status}")
     if run_results.status != "completed":
         raise EvaluationException(
             message=f"AOAI evaluation run {run_info['eval_group_id']}/{run_info['eval_run_id']}"
@@ -235,6 +254,7 @@ def _get_single_run_results(
         )
 
     # Convert run results into a dictionary of metrics
+    LOGGER.info(f"AOAI: Processing results and calculating metrics for run {run_info['eval_run_id']}...")
     run_metrics: Dict[str, Any] = {}
     if run_results.per_testing_criteria_results is None:
         msg = (
@@ -255,8 +275,10 @@ def _get_single_run_results(
         ratio = passed / (passed + failed) if (passed + failed) else 0.0
         formatted_column_name = f"{grader_name}.pass_rate"
         run_metrics[formatted_column_name] = ratio
+        LOGGER.info(f"AOAI: Grader '{grader_name}': {passed} passed, {failed} failed, pass_rate={ratio:.4f}")
 
     # Collect all results with pagination
+    LOGGER.info(f"AOAI: Collecting output items for run {run_info['eval_run_id']} with pagination...")
     all_results: List[Any] = []
     next_cursor: Optional[str] = None
     limit = 100  # Max allowed by API
@@ -280,6 +302,7 @@ def _get_single_run_results(
         else:
             break
 
+    LOGGER.info(f"AOAI: Collected {len(all_results)} total output items across all pages.")
     listed_results: Dict[str, List[Any]] = {"index": []}
     # Raw data has no order guarantees; capture datasource_item_id per row for ordering.
     for row_result in all_results:
@@ -329,6 +352,7 @@ def _get_single_run_results(
 
     # Ensure all columns are the same length as the 'index' list
     num_rows = len(listed_results["index"])
+    LOGGER.info(f"AOAI: Processing {num_rows} result rows into dataframe...")
     for col_name in list(listed_results.keys()):
         if col_name != "index":
             col_length = len(listed_results[col_name])
@@ -356,6 +380,7 @@ def _get_single_run_results(
     expected = run_info.get("expected_rows", None)
     if expected is not None:
         pre_len = len(output_df)
+        LOGGER.info(f"AOAI: Validating result count: expected {expected} rows, received {pre_len} rows.")
         # Assumes original datasource_item_id space is 0..expected-1
         output_df = output_df.reindex(range(expected))
         if pre_len != expected:
@@ -388,6 +413,9 @@ def _get_single_run_results(
 
     # Reset to RangeIndex so downstream concatenation aligns on position
     output_df.reset_index(drop=True, inplace=True)
+    LOGGER.info(
+        f"AOAI: Successfully processed run {run_info['eval_run_id']} with final dataframe shape: {output_df.shape}"
+    )
     return output_df, run_metrics
 
 
@@ -481,11 +509,16 @@ def _get_graders_and_column_mappings(
     :rtype: List[Tuple[Dict[str, AoaiGrader], Optional[Dict[str, str]]]]
     """
 
+    LOGGER.info(f"AOAI: Organizing {len(graders)} graders with column mappings...")
     if column_mappings is None:
+        LOGGER.info("AOAI: No column mappings provided, each grader will have its own eval run.")
         return [({name: grader}, None) for name, grader in graders.items()]
     default_mapping = column_mappings.get("default", None)
     if default_mapping is None:
         default_mapping = {}
+    LOGGER.info(
+        f"AOAI: Using default mapping with {len(default_mapping)} entries for graders without specific mappings."
+    )
     return [
         ({name: grader}, None if column_mappings is None else column_mappings.get(name, default_mapping))
         for name, grader in graders.items()
@@ -593,17 +626,23 @@ def _generate_data_source_config(input_data_df: pd.DataFrame, column_mapping: Di
     helper function.
     """
     # Extract referenced data paths from mapping values of the form ${data.<path>} (ignore ${run.outputs.*})
+    LOGGER.info(
+        f"AOAI: Generating data source config for {len(input_data_df)} rows with {len(column_mapping)} column mapping(s)..."
+    )
     referenced_paths: List[str] = []
     for v in column_mapping.values():
         m = DATA_PATH_PATTERN.match(v)
         if m:
             referenced_paths.append(m.group(1))
 
+    LOGGER.info(f"AOAI: Found {len(referenced_paths)} referenced paths in column mappings: {referenced_paths}")
     # Decide if we have nested structures
     has_nested = any("." in p for p in referenced_paths)
+    LOGGER.info(f"AOAI: Schema generation mode: {'nested' if has_nested else 'flat'}")
 
     if not referenced_paths or not has_nested:
         # Legacy flat behavior (existing logic): treat each mapping key as independent string field
+        LOGGER.info("AOAI: Using flat schema generation (no nested structures detected).")
         data_source_config = {
             "type": "custom",
             "item_schema": {
@@ -617,6 +656,7 @@ def _generate_data_source_config(input_data_df: pd.DataFrame, column_mapping: Di
         for key in column_mapping.keys():
             props[key] = {"type": "string"}
             req.append(key)
+        LOGGER.info(f"AOAI: Flat schema generated with {len(props)} properties: {list(props.keys())}")
         return data_source_config
 
     # NEW: If all nested paths share the same first segment (e.g. 'item'),
@@ -625,12 +665,14 @@ def _generate_data_source_config(input_data_df: pd.DataFrame, column_mapping: Di
     first_segments = {p.split(".")[0] for p in referenced_paths}
     strip_wrapper = False
     wrapper_name = None
+    LOGGER.info(f"AOAI: First segments in referenced paths: {first_segments}")
     if len(first_segments) == 1:
         only_seg = next(iter(first_segments))
         # We only strip if that segment looks like the canonical wrapper.
         if only_seg == WRAPPER_KEY:
             strip_wrapper = True
             wrapper_name = only_seg
+            LOGGER.info(f"AOAI: All paths start with wrapper '{WRAPPER_KEY}', will strip from schema.")
 
     effective_paths = referenced_paths
     if strip_wrapper:
@@ -645,9 +687,12 @@ def _generate_data_source_config(input_data_df: pd.DataFrame, column_mapping: Di
         # If stripping produced at least one usable path, adopt; else fall back to original.
         if stripped:
             effective_paths = stripped
+            LOGGER.info(f"AOAI: Effective paths after stripping wrapper: {effective_paths}")
 
+    LOGGER.info(f"AOAI: Building nested schema from {len(effective_paths)} effective paths...")
     nested_schema = _build_schema_tree_from_paths(effective_paths, force_leaf_type="string")
 
+    LOGGER.info(f"AOAI: Nested schema generated successfully with type '{nested_schema.get('type')}'")
     return {
         "type": "custom",
         "item_schema": nested_schema,
@@ -697,6 +742,23 @@ def _get_data_source(input_data_df: pd.DataFrame, column_mapping: Dict[str, str]
     :return: A dictionary that can be used as the data source input for an OAI evaluation run.
     :rtype: Dict[str, Any]
     """
+
+    def _convert_value_to_string(val: Any) -> str:
+        """Convert a value to string representation for AOAI evaluation."""
+        if val is None:
+            return ""
+        elif isinstance(val, (str, int, float, bool)):
+            return str(val)
+        else:
+            try:  # Attempt to JSON serialize lists/dicts
+                return json.dumps(val, ensure_ascii=False)
+            except (TypeError, ValueError):
+                # Fallback for unserializable objects
+                return str(val)
+
+    LOGGER.info(
+        f"AOAI: Building data source from {len(input_data_df)} rows with {len(column_mapping)} column mappings..."
+    )
     # Gather path specs: list of tuples (original_mapping_value, relative_parts, dataframe_column_name)
     # relative_parts excludes the wrapper (so schema + content align).
     path_specs: List[Tuple[str, List[str], str]] = []
@@ -746,24 +808,21 @@ def _get_data_source(input_data_df: pd.DataFrame, column_mapping: Dict[str, str]
             leaf_name = pieces[-1]
             path_specs.append((formatted_entry, [leaf_name], run_col))
 
+    LOGGER.info(f"AOAI: Processed {len(path_specs)} path specifications from column mappings.")
     content: List[Dict[str, Any]] = []
 
     for _, row in input_data_df.iterrows():
         item_root: Dict[str, Any] = {}
 
+        # Track which dataframe columns have been processed via column_mapping
+        processed_cols: Set[str] = set()
+
         for _, rel_parts, df_col in path_specs:
             # Safely fetch value
             val = row.get(df_col, None)
 
             # Convert value to string to match schema's "type": "string" leaves.
-            # (If you later infer types, you can remove the stringify.)
-            if val is None:
-                str_val = ""
-            elif isinstance(val, (str, int, float, bool)):
-                str_val = str(val)
-            else:
-                # Lists / dicts / other -> string for now
-                str_val = str(val)
+            str_val = _convert_value_to_string(val)
 
             # Insert into nested dict
             cursor = item_root
@@ -776,8 +835,19 @@ def _get_data_source(input_data_df: pd.DataFrame, column_mapping: Dict[str, str]
             leaf_key = rel_parts[-1]
             cursor[leaf_key] = str_val
 
+            # Mark this dataframe column as processed
+            processed_cols.add(df_col)
+
+        # Add any unmapped dataframe columns directly to item_root
+        for col_name in input_data_df.columns:
+            if col_name not in processed_cols:
+                val = row.get(col_name, None)
+                str_val = _convert_value_to_string(val)
+                item_root[col_name] = str_val
+
         content.append({WRAPPER_KEY: item_root})
 
+    LOGGER.info(f"AOAI: Generated {len(content)} content items for data source.")
     return {
         "type": "jsonl",
         "source": {
@@ -812,6 +882,7 @@ def _begin_eval_run(
     :rtype: str
     """
 
+    LOGGER.info(f"AOAI: Creating eval run '{run_name}' for eval group {eval_group_id}...")
     data_source = _get_data_source(input_data_df, column_mapping)
     eval_run = client.evals.runs.create(
         eval_id=eval_group_id,
@@ -820,6 +891,7 @@ def _begin_eval_run(
         metadata={"sample_generation": "off", "file_format": "jsonl", "is_foundry_eval": "true"},
         # TODO decide if we want to add our own timeout value?
     )
+    LOGGER.info(f"AOAI: Eval run created successfully with ID: {eval_run.id}")
     return eval_run.id
 
 
@@ -856,8 +928,11 @@ def _wait_for_run_conclusion(
         if total_wait > max_wait_seconds:
             wait_interval -= total_wait - max_wait_seconds
         sleep(wait_interval)
+        iters += 1
         response = client.evals.runs.retrieve(eval_id=eval_group_id, run_id=eval_run_id)
+        LOGGER.info(f"AOAI: Polling iteration {iters}, status: {response.status}, total wait: {total_wait:.1f}s")
         if response.status not in ["queued", "in_progress"]:
+            LOGGER.info(f"AOAI: Eval run {eval_run_id} reached terminal status: {response.status}")
             return response
         if total_wait > max_wait_seconds:
             raise EvaluationException(
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/data/flat_test_data.jsonl b/sdk/evaluation/azure-ai-evaluation/tests/unittests/data/flat_test_data.jsonl
new file mode 100644
index 000000000000..1ae330125fc8
--- /dev/null
+++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/data/flat_test_data.jsonl
@@ -0,0 +1,3 @@
+{"query": "What is the capital of France?", "response": "Paris is the capital of France.", "ground_truth": "Paris"}
+{"query": "What is 2+2?", "response": "The answer is 4.", "ground_truth": "4"}
+{"query": "Who wrote Hamlet?", "response": "William Shakespeare wrote Hamlet.", "ground_truth": "Shakespeare"}
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/data/nested_test_data.jsonl b/sdk/evaluation/azure-ai-evaluation/tests/unittests/data/nested_test_data.jsonl
new file mode 100644
index 000000000000..17e785c16b5d
--- /dev/null
+++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/data/nested_test_data.jsonl
@@ -0,0 +1,3 @@
+{"item": {"query": "What security policies exist?", "context": {"company": {"policy": {"security": {"passwords": {"rotation_days": 90, "min_length": 12}, "network": {"vpn": {"required": true, "provider": "Cisco"}}}}}}, "response": "Password rotation is required every 90 days with minimum 12 characters. VPN is required using Cisco provider.", "ground_truth": "Security policies include password rotation every 90 days and VPN requirement."}}
+{"item": {"query": "What are the database settings?", "context": {"company": {"infrastructure": {"database": {"host": "db.example.com", "port": 5432, "type": "PostgreSQL"}}}}, "response": "The database is PostgreSQL hosted at db.example.com on port 5432.", "ground_truth": "PostgreSQL database on db.example.com:5432"}}
+{"item": {"query": "What is the deployment process?", "context": {"company": {"devops": {"deployment": {"strategy": "blue-green", "frequency": "daily", "tools": ["Jenkins", "Kubernetes"]}}}}, "response": "We use blue-green deployment strategy daily with Jenkins and Kubernetes.", "ground_truth": "Blue-green deployment daily using Jenkins and Kubernetes"}}
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/data/wrapped_flat_test_data.jsonl b/sdk/evaluation/azure-ai-evaluation/tests/unittests/data/wrapped_flat_test_data.jsonl
new file mode 100644
index 000000000000..195bd72ae4e7
--- /dev/null
+++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/data/wrapped_flat_test_data.jsonl
@@ -0,0 +1,2 @@
+{"item": {"query": "Simple query", "response": "Simple response", "ground_truth": "Simple truth"}}
+{"item": {"query": "Another query", "response": "Another response", "ground_truth": "Another truth"}}
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_aoai_data_source.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_aoai_data_source.py
new file mode 100644
index 000000000000..c74991160e9d
--- /dev/null
+++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_aoai_data_source.py
@@ -0,0 +1,510 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+
+import pytest
+import pandas as pd
+import os
+import pathlib
+from typing import Dict, Any
+
+from azure.ai.evaluation._evaluate._evaluate_aoai import (
+    _generate_data_source_config,
+    _get_data_source,
+    _build_schema_tree_from_paths,
+    WRAPPER_KEY,
+)
+
+
+def _get_file(name):
+    """Get the file from the unittest data folder."""
+    data_path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data")
+    return os.path.join(data_path, name)
+
+
+@pytest.fixture
+def flat_test_data():
+    """Fixture for flat structure test data."""
+    return pd.DataFrame(
+        [
+            {
+                "query": "What is the capital of France?",
+                "response": "Paris is the capital of France.",
+                "ground_truth": "Paris",
+            },
+            {"query": "What is 2+2?", "response": "The answer is 4.", "ground_truth": "4"},
+            {
+                "query": "Who wrote Hamlet?",
+                "response": "William Shakespeare wrote Hamlet.",
+                "ground_truth": "Shakespeare",
+            },
+        ]
+    )
+
+
+@pytest.fixture
+def nested_test_data():
+    """Fixture for nested structure test data."""
+    return pd.DataFrame(
+        [
+            {
+                "item.query": "What security policies exist?",
+                "item.context.company.policy.security.passwords.rotation_days": "90",
+                "item.context.company.policy.security.network.vpn.required": "true",
+                "item.response": "Password rotation is required every 90 days.",
+                "item.ground_truth": "Security policies include password rotation.",
+            },
+            {
+                "item.query": "What are the database settings?",
+                "item.context.company.infrastructure.database.host": "db.example.com",
+                "item.context.company.infrastructure.database.port": "5432",
+                "item.response": "The database is PostgreSQL.",
+                "item.ground_truth": "PostgreSQL database",
+            },
+        ]
+    )
+
+
+@pytest.fixture
+def flat_test_data_file():
+    """Fixture for flat test data file path."""
+    return _get_file("flat_test_data.jsonl")
+
+
+@pytest.fixture
+def nested_test_data_file():
+    """Fixture for nested test data file path."""
+    return _get_file("nested_test_data.jsonl")
+
+
+@pytest.fixture
+def wrapped_flat_test_data_file():
+    """Fixture for wrapped flat test data file path."""
+    return _get_file("wrapped_flat_test_data.jsonl")
+
+
+@pytest.mark.unittest
+class TestBuildSchemaTreeFromPaths:
+    """Test suite for the _build_schema_tree_from_paths helper function."""
+
+    def test_single_level_paths(self):
+        """Test building schema with single-level paths."""
+        paths = ["query", "response", "ground_truth"]
+        schema = _build_schema_tree_from_paths(paths, force_leaf_type="string")
+
+        assert schema["type"] == "object"
+        assert "properties" in schema
+        assert "required" in schema
+        assert set(schema["properties"].keys()) == {"query", "response", "ground_truth"}
+        assert all(prop["type"] == "string" for prop in schema["properties"].values())
+        assert set(schema["required"]) == {"query", "response", "ground_truth"}
+
+    def test_nested_paths(self):
+        """Test building schema with nested paths."""
+        paths = [
+            "context.company.policy.security.passwords.rotation_days",
+            "context.company.policy.security.network.vpn.required",
+            "query",
+        ]
+        schema = _build_schema_tree_from_paths(paths, force_leaf_type="string")
+
+        assert schema["type"] == "object"
+        assert "context" in schema["properties"]
+        assert schema["properties"]["context"]["type"] == "object"
+
+        # Navigate nested structure
+        company = schema["properties"]["context"]["properties"]["company"]
+        assert company["type"] == "object"
+
+        policy = company["properties"]["policy"]
+        assert policy["type"] == "object"
+
+        security = policy["properties"]["security"]
+        assert security["type"] == "object"
+
+        # Check leaf nodes
+        passwords = security["properties"]["passwords"]
+        assert passwords["properties"]["rotation_days"]["type"] == "string"
+
+        network = security["properties"]["network"]
+        assert network["properties"]["vpn"]["properties"]["required"]["type"] == "string"
+
+        # Check required arrays exist at each level
+        assert "required" in schema
+        assert "required" in schema["properties"]["context"]
+
+    def test_empty_paths(self):
+        """Test building schema with empty paths list."""
+        paths = []
+        schema = _build_schema_tree_from_paths(paths, force_leaf_type="object")
+
+        assert schema["type"] == "object"
+
+    def test_mixed_depth_paths(self):
+        """Test building schema with paths of different depths."""
+        paths = ["simple_field", "nested.field.deep", "nested.field.shallow", "another.path"]
+        schema = _build_schema_tree_from_paths(paths, force_leaf_type="string")
+
+        assert "simple_field" in schema["properties"]
+        assert schema["properties"]["simple_field"]["type"] == "string"
+
+        assert "nested" in schema["properties"]
+        nested = schema["properties"]["nested"]
+        assert nested["type"] == "object"
+        assert "field" in nested["properties"]
+
+
+@pytest.mark.unittest
+class TestGenerateDataSourceConfig:
+    """Test suite for the _generate_data_source_config function."""
+
+    def test_flat_column_mapping(self, flat_test_data):
+        """Test generating data source config with flat column mappings."""
+        column_mapping = {
+            "query": "${data.query}",
+            "response": "${data.response}",
+            "ground_truth": "${data.ground_truth}",
+        }
+
+        config = _generate_data_source_config(flat_test_data, column_mapping)
+
+        assert config["type"] == "custom"
+        assert "item_schema" in config
+        assert config["item_schema"]["type"] == "object"
+
+        properties = config["item_schema"]["properties"]
+        assert "query" in properties
+        assert "response" in properties
+        assert "ground_truth" in properties
+
+        # All should be strings in flat mode
+        assert properties["query"]["type"] == "string"
+        assert properties["response"]["type"] == "string"
+        assert properties["ground_truth"]["type"] == "string"
+
+    def test_nested_column_mapping_with_wrapper(self, nested_test_data):
+        """Test generating data source config with nested paths under wrapper."""
+        column_mapping = {
+            "query": "${data.item.query}",
+            "passwords_rotation": "${data.item.context.company.policy.security.passwords.rotation_days}",
+            "vpn_required": "${data.item.context.company.policy.security.network.vpn.required}",
+            "response": "${data.item.response}",
+        }
+
+        config = _generate_data_source_config(nested_test_data, column_mapping)
+
+        assert config["type"] == "custom"
+        assert "item_schema" in config
+        schema = config["item_schema"]
+
+        # Should be nested object since paths contain dots
+        assert schema["type"] == "object"
+
+        # The wrapper should be stripped, so we should see inner structure
+        assert "query" in schema["properties"]
+        assert "response" in schema["properties"]
+        assert "context" in schema["properties"]
+
+        # Verify nested structure
+        context = schema["properties"]["context"]
+        assert context["type"] == "object"
+        assert "company" in context["properties"]
+
+    def test_nested_column_mapping_without_wrapper(self, nested_test_data):
+        """Test generating data source config with nested paths not using standard wrapper."""
+        column_mapping = {
+            "query": "${data.custom.query}",
+            "field": "${data.custom.nested.field}",
+        }
+
+        config = _generate_data_source_config(nested_test_data, column_mapping)
+
+        assert config["type"] == "custom"
+        assert "item_schema" in config
+        schema = config["item_schema"]
+
+        # Should be nested
+        assert schema["type"] == "object"
+        # Without wrapper stripping, should see 'custom' at top level
+        assert "custom" in schema["properties"]
+
+    def test_mixed_data_and_run_outputs(self, flat_test_data):
+        """Test column mapping with both data and run.outputs references."""
+        column_mapping = {
+            "query": "${data.query}",
+            "response": "${run.outputs.response}",
+            "ground_truth": "${data.ground_truth}",
+        }
+
+        config = _generate_data_source_config(flat_test_data, column_mapping)
+
+        # Only data.* paths should be in schema
+        properties = config["item_schema"]["properties"]
+        assert "query" in properties
+        assert "ground_truth" in properties
+        # run.outputs.response shouldn't create a schema property directly
+
+    def test_empty_column_mapping(self, flat_test_data):
+        """Test with empty column mapping."""
+        column_mapping = {}
+
+        config = _generate_data_source_config(flat_test_data, column_mapping)
+
+        # Should return flat schema with no properties
+        assert config["type"] == "custom"
+        assert config["item_schema"]["type"] == "object"
+        assert config["item_schema"]["properties"] == {}
+
+    def test_no_data_references(self, flat_test_data):
+        """Test column mapping with no ${data.*} references."""
+        column_mapping = {"response": "${run.outputs.response}", "result": "${run.outputs.result}"}
+
+        config = _generate_data_source_config(flat_test_data, column_mapping)
+
+        # Should return flat schema since no data paths referenced
+        assert config["type"] == "custom"
+        assert "response" in config["item_schema"]["properties"]
+        assert "result" in config["item_schema"]["properties"]
+
+    def test_single_nested_path(self, flat_test_data):
+        """Test with a single nested path to ensure nested mode activates."""
+        column_mapping = {"nested_field": "${data.item.context.field}"}
+
+        config = _generate_data_source_config(flat_test_data, column_mapping)
+
+        # Should generate nested schema
+        assert config["type"] == "custom"
+        schema = config["item_schema"]
+        assert schema["type"] == "object"
+        # After wrapper stripping, should see context
+        assert "context" in schema["properties"]
+
+
+@pytest.mark.unittest
+class TestGetDataSource:
+    """Test suite for the _get_data_source function."""
+
+    def test_flat_data_source_generation(self, flat_test_data):
+        """Test generating data source from flat data."""
+        column_mapping = {
+            "query": "${data.query}",
+            "response": "${data.response}",
+            "ground_truth": "${data.ground_truth}",
+        }
+
+        data_source = _get_data_source(flat_test_data, column_mapping)
+
+        assert data_source["type"] == "jsonl"
+        assert "source" in data_source
+        assert data_source["source"]["type"] == "file_content"
+
+        content = data_source["source"]["content"]
+        assert len(content) == 3
+
+        # Each item should be wrapped
+        for item in content:
+            assert WRAPPER_KEY in item
+            assert "query" in item[WRAPPER_KEY]
+            assert "response" in item[WRAPPER_KEY]
+            assert "ground_truth" in item[WRAPPER_KEY]
+
+    def test_nested_data_source_generation(self, nested_test_data):
+        """Test generating data source from nested data."""
+        column_mapping = {
+            "query": "${data.item.query}",
+            "rotation_days": "${data.item.context.company.policy.security.passwords.rotation_days}",
+            "vpn_required": "${data.item.context.company.policy.security.network.vpn.required}",
+            "response": "${data.item.response}",
+        }
+
+        data_source = _get_data_source(nested_test_data, column_mapping)
+
+        assert data_source["type"] == "jsonl"
+        content = data_source["source"]["content"]
+        assert len(content) == 2
+
+        # Verify nested structure is built correctly
+        first_item = content[0][WRAPPER_KEY]
+        assert "query" in first_item
+        assert "context" in first_item
+        assert "company" in first_item["context"]
+        assert "policy" in first_item["context"]["company"]
+
+        # Check leaf values
+        passwords = first_item["context"]["company"]["policy"]["security"]["passwords"]
+        assert passwords["rotation_days"] == "90"
+
+        vpn = first_item["context"]["company"]["policy"]["security"]["network"]["vpn"]
+        assert vpn["required"] == "true"
+
+    def test_data_source_with_run_outputs(self, flat_test_data):
+        """Test data source generation with run.outputs mappings."""
+        # Add __outputs column to simulate target function output
+        flat_test_data["__outputs.model_response"] = [
+            "Generated response 1",
+            "Generated response 2",
+            "Generated response 3",
+        ]
+
+        column_mapping = {
+            "query": "${data.query}",
+            "response": "${run.outputs.model_response}",
+            "ground_truth": "${data.ground_truth}",
+        }
+
+        data_source = _get_data_source(flat_test_data, column_mapping)
+
+        content = data_source["source"]["content"]
+
+        # run.outputs should be mapped with just leaf name
+        for i, item in enumerate(content):
+            assert "model_response" in item[WRAPPER_KEY]
+            assert item[WRAPPER_KEY]["model_response"] == f"Generated response {i+1}"
+
+    def test_data_source_with_unmapped_columns(self, flat_test_data):
+        """Test that unmapped columns are included in output."""
+        # Add extra column not in mapping
+        flat_test_data["extra_field"] = ["extra1", "extra2", "extra3"]
+
+        column_mapping = {"query": "${data.query}", "response": "${data.response}"}
+
+        data_source = _get_data_source(flat_test_data, column_mapping)
+
+        content = data_source["source"]["content"]
+
+        # Unmapped columns should appear directly in item
+        for i, item in enumerate(content):
+            assert "extra_field" in item[WRAPPER_KEY]
+            assert "ground_truth" in item[WRAPPER_KEY]  # Also unmapped
+
+    def test_data_source_with_none_values(self, flat_test_data):
+        """Test data source generation handles None values correctly."""
+        flat_test_data.loc[1, "response"] = None
+
+        column_mapping = {
+            "query": "${data.query}",
+            "response": "${data.response}",
+            "ground_truth": "${data.ground_truth}",
+        }
+
+        data_source = _get_data_source(flat_test_data, column_mapping)
+
+        content = data_source["source"]["content"]
+
+        # None should be converted to empty string
+        assert content[1][WRAPPER_KEY]["response"] == ""
+
+    def test_data_source_with_numeric_values(self, flat_test_data):
+        """Test data source generation converts numeric values to strings."""
+        flat_test_data["score"] = [95, 87, 92]
+        flat_test_data["confidence"] = [0.95, 0.87, 0.92]
+
+        column_mapping = {"query": "${data.query}", "score": "${data.score}", "confidence": "${data.confidence}"}
+
+        data_source = _get_data_source(flat_test_data, column_mapping)
+
+        content = data_source["source"]["content"]
+
+        # Numeric values should be converted to strings
+        assert content[0][WRAPPER_KEY]["score"] == "95"
+        assert content[0][WRAPPER_KEY]["confidence"] == "0.95"
+        assert isinstance(content[0][WRAPPER_KEY]["score"], str)
+        assert isinstance(content[0][WRAPPER_KEY]["confidence"], str)
+
+    def test_empty_dataframe(self):
+        """Test data source generation with empty dataframe."""
+        empty_df = pd.DataFrame()
+        column_mapping = {"query": "${data.query}"}
+
+        data_source = _get_data_source(empty_df, column_mapping)
+
+        assert data_source["type"] == "jsonl"
+        assert len(data_source["source"]["content"]) == 0
+
+    def test_complex_nested_structure(self):
+        """Test with complex multi-level nested structure."""
+        df = pd.DataFrame(
+            [
+                {
+                    "item.a.b.c.d": "deep_value",
+                    "item.a.b.x": "mid_value",
+                    "item.a.y": "shallow_value",
+                    "item.z": "top_value",
+                }
+            ]
+        )
+
+        column_mapping = {
+            "deep": "${data.item.a.b.c.d}",
+            "mid": "${data.item.a.b.x}",
+            "shallow": "${data.item.a.y}",
+            "top": "${data.item.z}",
+        }
+
+        data_source = _get_data_source(df, column_mapping)
+
+        content = data_source["source"]["content"]
+        item = content[0][WRAPPER_KEY]
+
+        # Verify nested structure
+        assert item["a"]["b"]["c"]["d"] == "deep_value"
+        assert item["a"]["b"]["x"] == "mid_value"
+        assert item["a"]["y"] == "shallow_value"
+        assert item["z"] == "top_value"
+
+    def test_data_source_preserves_row_order(self, flat_test_data):
+        """Test that data source preserves the order of rows."""
+        column_mapping = {"query": "${data.query}", "response": "${data.response}"}
+
+        data_source = _get_data_source(flat_test_data, column_mapping)
+        content = data_source["source"]["content"]
+
+        # Verify order matches input
+        assert content[0][WRAPPER_KEY]["query"] == flat_test_data.iloc[0]["query"]
+        assert content[1][WRAPPER_KEY]["query"] == flat_test_data.iloc[1]["query"]
+        assert content[2][WRAPPER_KEY]["query"] == flat_test_data.iloc[2]["query"]
+
+
+@pytest.mark.unittest
+class TestDataSourceConfigIntegration:
+    """Integration tests for schema and data source generation working together."""
+
+    def test_flat_schema_and_data_alignment(self, flat_test_data):
+        """Test that schema and data are aligned for flat structure."""
+        column_mapping = {
+            "query": "${data.query}",
+            "response": "${data.response}",
+            "ground_truth": "${data.ground_truth}",
+        }
+
+        config = _generate_data_source_config(flat_test_data, column_mapping)
+        data_source = _get_data_source(flat_test_data, column_mapping)
+
+        schema_props = config["item_schema"]["properties"]
+        data_item = data_source["source"]["content"][0][WRAPPER_KEY]
+
+        # All schema properties should exist in data
+        for prop_name in schema_props.keys():
+            assert prop_name in data_item
+
+    def test_nested_schema_and_data_alignment(self, nested_test_data):
+        """Test that schema and data are aligned for nested structure."""
+        column_mapping = {
+            "query": "${data.item.query}",
+            "rotation_days": "${data.item.context.company.policy.security.passwords.rotation_days}",
+            "response": "${data.item.response}",
+        }
+
+        config = _generate_data_source_config(nested_test_data, column_mapping)
+        data_source = _get_data_source(nested_test_data, column_mapping)
+
+        # Both should handle nested structure consistently
+        assert config["item_schema"]["type"] == "object"
+        assert WRAPPER_KEY in data_source["source"]["content"][0]
+
+        # Verify nested paths exist in data
+        item = data_source["source"]["content"][0][WRAPPER_KEY]
+        assert "query" in item
+        assert "context" in item
+        assert "company" in item["context"]
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_aoai_nested_integration.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_aoai_nested_integration.py
new file mode 100644
index 000000000000..8bfbdf1edad0
--- /dev/null
+++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_aoai_nested_integration.py
@@ -0,0 +1,289 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+
+import pytest
+import pandas as pd
+from unittest.mock import Mock, patch, MagicMock
+from typing import Dict, Any
+
+from azure.ai.evaluation._evaluate._evaluate_aoai import (
+    _generate_data_source_config,
+    _get_data_source,
+    _begin_eval_run,
+    WRAPPER_KEY,
+)
+
+
+@pytest.mark.unittest
+class TestAOAINestedDataIntegration:
+    """Test suite for AOAI evaluation integration with nested data structures."""
+
+    def test_aoai_eval_run_with_flat_data(self):
+        """Test _begin_eval_run with flat data structure."""
+        # Setup test data
+        input_df = pd.DataFrame(
+            [
+                {"query": "What is AI?", "response": "AI is...", "ground_truth": "AI"},
+                {"query": "What is ML?", "response": "ML is...", "ground_truth": "ML"},
+            ]
+        )
+
+        column_mapping = {
+            "query": "${data.query}",
+            "response": "${data.response}",
+            "ground_truth": "${data.ground_truth}",
+        }
+
+        # Mock the client
+        mock_client = Mock()
+        mock_run = Mock()
+        mock_run.id = "test-run-123"
+        mock_client.evals.runs.create.return_value = mock_run
+
+        # Call the function
+        run_id = _begin_eval_run(
+            client=mock_client,
+            eval_group_id="test-group-456",
+            run_name="test-run",
+            input_data_df=input_df,
+            column_mapping=column_mapping,
+        )
+
+        # Verify the client was called
+        assert run_id == "test-run-123"
+        mock_client.evals.runs.create.assert_called_once()
+
+        # Get the call arguments
+        call_kwargs = mock_client.evals.runs.create.call_args[1]
+
+        # Verify eval_id
+        assert call_kwargs["eval_id"] == "test-group-456"
+        assert call_kwargs["name"] == "test-run"
+
+        # Verify data_source structure
+        data_source = call_kwargs["data_source"]
+        assert data_source["type"] == "jsonl"
+        assert "source" in data_source
+        assert data_source["source"]["type"] == "file_content"
+
+        # Verify content
+        content = data_source["source"]["content"]
+        assert len(content) == 2
+
+        # Each item should be wrapped
+        for item in content:
+            assert WRAPPER_KEY in item
+            assert "query" in item[WRAPPER_KEY]
+            assert "response" in item[WRAPPER_KEY]
+            assert "ground_truth" in item[WRAPPER_KEY]
+
+    def test_aoai_eval_run_with_nested_data(self):
+        """Test _begin_eval_run with nested data structure."""
+        # Setup nested test data
+        input_df = pd.DataFrame(
+            [
+                {
+                    "item.query": "Security question",
+                    "item.context.company.policy.security.passwords.rotation_days": "90",
+                    "item.context.company.policy.security.network.vpn.required": "true",
+                    "item.response": "Password rotation is 90 days.",
+                    "item.ground_truth": "90",
+                }
+            ]
+        )
+
+        column_mapping = {
+            "query": "${data.item.query}",
+            "rotation_days": "${data.item.context.company.policy.security.passwords.rotation_days}",
+            "vpn_required": "${data.item.context.company.policy.security.network.vpn.required}",
+            "response": "${data.item.response}",
+            "ground_truth": "${data.item.ground_truth}",
+        }
+
+        # Mock the client
+        mock_client = Mock()
+        mock_run = Mock()
+        mock_run.id = "nested-run-789"
+        mock_client.evals.runs.create.return_value = mock_run
+
+        # Call the function
+        run_id = _begin_eval_run(
+            client=mock_client,
+            eval_group_id="nested-group-101",
+            run_name="nested-test-run",
+            input_data_df=input_df,
+            column_mapping=column_mapping,
+        )
+
+        # Verify
+        assert run_id == "nested-run-789"
+        mock_client.evals.runs.create.assert_called_once()
+
+        # Get the data source
+        call_kwargs = mock_client.evals.runs.create.call_args[1]
+        data_source = call_kwargs["data_source"]
+        content = data_source["source"]["content"]
+
+        # Verify nested structure was built
+        assert len(content) == 1
+        item_root = content[0][WRAPPER_KEY]
+
+        # Check nested paths exist
+        assert "query" in item_root
+        assert "context" in item_root
+        assert "company" in item_root["context"]
+        assert "policy" in item_root["context"]["company"]
+        assert "security" in item_root["context"]["company"]["policy"]
+        assert "passwords" in item_root["context"]["company"]["policy"]["security"]
+        assert "rotation_days" in item_root["context"]["company"]["policy"]["security"]["passwords"]
+        assert item_root["context"]["company"]["policy"]["security"]["passwords"]["rotation_days"] == "90"
+
+    def test_data_source_config_matches_data_source_for_nested(self):
+        """Test that schema config and data source align for nested structures."""
+        input_df = pd.DataFrame(
+            [
+                {
+                    "item.query": "Test query",
+                    "item.context.field1": "value1",
+                    "item.context.field2": "value2",
+                    "item.response": "Test response",
+                }
+            ]
+        )
+
+        column_mapping = {
+            "query": "${data.item.query}",
+            "field1": "${data.item.context.field1}",
+            "field2": "${data.item.context.field2}",
+            "response": "${data.item.response}",
+        }
+
+        # Generate both config and data source
+        config = _generate_data_source_config(input_df, column_mapping)
+        data_source = _get_data_source(input_df, column_mapping)
+
+        # Verify config structure
+        assert config["type"] == "custom"
+        schema = config["item_schema"]
+        assert schema["type"] == "object"
+
+        # Verify schema has nested structure (wrapper stripped)
+        assert "query" in schema["properties"]
+        assert "context" in schema["properties"]
+        assert schema["properties"]["context"]["type"] == "object"
+
+        # Verify data source structure matches
+        content = data_source["source"]["content"]
+        item_root = content[0][WRAPPER_KEY]
+
+        # All schema properties should exist in data
+        assert "query" in item_root
+        assert "context" in item_root
+        assert "field1" in item_root["context"]
+        assert "field2" in item_root["context"]
+        assert "response" in item_root
+
+    def test_data_source_config_matches_data_source_for_flat(self):
+        """Test that schema config and data source align for flat structures."""
+        input_df = pd.DataFrame([{"query": "Test", "response": "Answer", "score": "5"}])
+
+        column_mapping = {"query": "${data.query}", "response": "${data.response}", "score": "${data.score}"}
+
+        # Generate both config and data source
+        config = _generate_data_source_config(input_df, column_mapping)
+        data_source = _get_data_source(input_df, column_mapping)
+
+        # Verify flat config structure
+        assert config["type"] == "custom"
+        schema = config["item_schema"]
+        assert schema["type"] == "object"
+
+        # Flat mode: properties match mapping keys
+        assert set(schema["properties"].keys()) == {"query", "response", "score"}
+
+        # Verify data source
+        content = data_source["source"]["content"]
+        item_root = content[0][WRAPPER_KEY]
+
+        # All properties should exist
+        assert "query" in item_root
+        assert "response" in item_root
+        assert "score" in item_root
+
+    def test_data_source_with_run_outputs_and_nested_data(self):
+        """Test data source generation with both run outputs and nested data."""
+        input_df = pd.DataFrame(
+            [
+                {
+                    "item.query": "Test query",
+                    "item.context.metadata.id": "123",
+                    "__outputs.generated_response": "Generated text",
+                }
+            ]
+        )
+
+        column_mapping = {
+            "query": "${data.item.query}",
+            "metadata_id": "${data.item.context.metadata.id}",
+            "response": "${run.outputs.generated_response}",
+        }
+
+        # Generate data source
+        data_source = _get_data_source(input_df, column_mapping)
+
+        # Verify structure
+        content = data_source["source"]["content"]
+        item_root = content[0][WRAPPER_KEY]
+
+        # Nested data paths
+        assert "query" in item_root
+        assert "context" in item_root
+        assert "metadata" in item_root["context"]
+        assert item_root["context"]["metadata"]["id"] == "123"
+
+        # Run outputs (just leaf name)
+        assert "generated_response" in item_root
+        assert item_root["generated_response"] == "Generated text"
+
+    def test_complex_nested_structure_multiple_branches(self):
+        """Test nested structure with multiple branches at same level."""
+        input_df = pd.DataFrame(
+            [
+                {
+                    "item.user.name": "Alice",
+                    "item.user.email": "alice@example.com",
+                    "item.system.version": "1.0",
+                    "item.system.region": "us-east",
+                    "item.query": "Test",
+                }
+            ]
+        )
+
+        column_mapping = {
+            "name": "${data.item.user.name}",
+            "email": "${data.item.user.email}",
+            "version": "${data.item.system.version}",
+            "region": "${data.item.system.region}",
+            "query": "${data.item.query}",
+        }
+
+        # Generate config and data
+        config = _generate_data_source_config(input_df, column_mapping)
+        data_source = _get_data_source(input_df, column_mapping)
+
+        # Verify schema has both branches
+        schema = config["item_schema"]
+        assert "user" in schema["properties"]
+        assert "system" in schema["properties"]
+        assert "query" in schema["properties"]
+
+        # Verify data has both branches
+        item_root = data_source["source"]["content"][0][WRAPPER_KEY]
+        assert "user" in item_root
+        assert "system" in item_root
+        assert item_root["user"]["name"] == "Alice"
+        assert item_root["user"]["email"] == "alice@example.com"
+        assert item_root["system"]["version"] == "1.0"
+        assert item_root["system"]["region"] == "us-east"