azure-sdk · chienyuanchang · Dec 8, 2025 · Dec 8, 2025 · yungshinlintw · Dec 8, 2025
diff --git a/...ure-ai-contentunderstanding/samples/async_samples/sample_analyze_return_raw_json_async.py b/...ure-ai-contentunderstanding/samples/async_samples/sample_analyze_return_raw_json_async.py
@@ -9,14 +9,18 @@
 
 DESCRIPTION:
     This sample demonstrates how to access the raw JSON response from analysis operations
-    using protocol methods. This is useful for advanced scenarios where you need direct access
-    to the JSON structure.
+    using the 'cls' callback parameter (async version). This is useful for advanced scenarios
+    where you need direct access to the JSON structure.
 
     The Content Understanding SDK provides two approaches for accessing analysis results:
+
     1. Object model approach (recommended): Returns strongly-typed AnalyzeResult objects
-    2. Protocol method approach: Returns raw BinaryData containing the JSON response
+       that are easier to navigate and use. This is shown in sample_analyze_binary_async.py.
+
+    2. Protocol method approach: Returns raw HTTP response containing the JSON. This sample
+       demonstrates this approach for advanced scenarios.
 
-    For production use, prefer the object model approach as it provides:
+    IMPORTANT: For production use, prefer the object model approach as it provides:
     - Type safety
     - IntelliSense support
     - Easier navigation of results
@@ -66,21 +70,31 @@ async def main() -> None:
 
         print(f"Analyzing {file_path} with prebuilt-documentSearch...")
 
-        # Use the standard method which returns an AnalyzeResult
-        # Then serialize to JSON for raw access
+        # Use the 'cls' callback parameter to get the raw HTTP response
+        # The 'cls' parameter allows us to intercept the response and return custom data
+        # We return a tuple: (deserialized_object, raw_http_response)
+        # Note: For production use, prefer the object model approach (without cls parameter)
+        #       which returns AnalyzeResult objects that are easier to work with
         poller = await client.begin_analyze_binary(
             analyzer_id="prebuilt-documentSearch",
             binary_input=file_bytes,
+            content_type="application/pdf",
+            cls=lambda pipeline_response, deserialized_obj, response_headers: (
+                deserialized_obj,
+                pipeline_response.http_response,
+            ),
         )
-        result = await poller.result()
 
-        # Convert to dictionary and then to JSON
-        result_dict = result.as_dict()
+        # Wait for completion and get both the deserialized object and raw HTTP response
+        _, raw_http_response = await poller.result()
         # [END analyze_return_raw_json]
 
         # [START parse_raw_json]
+        # Parse the raw JSON response
+        response_json = raw_http_response.json()
+
         # Pretty-print the JSON
-        pretty_json = json.dumps(result_dict, indent=2, ensure_ascii=False, default=str)
+        pretty_json = json.dumps(response_json, indent=2, ensure_ascii=False)
 
         # Create output directory if it doesn't exist
         output_dir = Path(__file__).parent.parent / "sample_output"
@@ -94,17 +108,26 @@ async def main() -> None:
         with open(output_path, "w", encoding="utf-8") as f:
             f.write(pretty_json)
 
-        print(f"\nRaw JSON response saved to: {output_path}")
+        print(f"Raw JSON response saved to: {output_path}")
         print(f"File size: {len(pretty_json):,} characters")
-
-        # Show a preview of the JSON structure
-        print("\nJSON Structure Preview:")
-        print("=" * 50)
-        preview = pretty_json[:2000] + "..." if len(pretty_json) > 2000 else pretty_json
-        print(preview)
-        print("=" * 50)
         # [END parse_raw_json]
 
+        # [START extract_from_raw_json]
+        # Extract key information from raw JSON
+        if "result" in response_json:
+            result_data = response_json["result"]
+            if "analyzerId" in result_data:
+                print(f"Analyzer ID: {result_data['analyzerId']}")
+            if "contents" in result_data and isinstance(result_data["contents"], list):
+                print(f"Contents count: {len(result_data['contents'])}")
+                if len(result_data["contents"]) > 0:
+                    first_content = result_data["contents"][0]
+                    if "kind" in first_content:
+                        print(f"Content kind: {first_content['kind']}")
+                    if "mimeType" in first_content:
+                        print(f"MIME type: {first_content['mimeType']}")
+        # [END extract_from_raw_json]
+
     if not isinstance(credential, AzureKeyCredential):
         await credential.close()
 

diff --git a/...tentunderstanding/azure-ai-contentunderstanding/samples/sample_analyze_return_raw_json.py b/...tentunderstanding/azure-ai-contentunderstanding/samples/sample_analyze_return_raw_json.py
@@ -9,14 +9,18 @@
 
 DESCRIPTION:
     This sample demonstrates how to access the raw JSON response from analysis operations
-    using protocol methods. This is useful for advanced scenarios where you need direct access
-    to the JSON structure.
+    using the 'cls' callback parameter. This is useful for advanced scenarios where you need
+    direct access to the JSON structure.
 
     The Content Understanding SDK provides two approaches for accessing analysis results:
+
     1. Object model approach (recommended): Returns strongly-typed AnalyzeResult objects
-    2. Protocol method approach: Returns raw BinaryData containing the JSON response
+       that are easier to navigate and use. This is shown in sample_analyze_binary.py.
+
+    2. Protocol method approach: Returns raw HTTP response containing the JSON. This sample
+       demonstrates this approach for advanced scenarios.
 
-    For production use, prefer the object model approach as it provides:
+    IMPORTANT: For production use, prefer the object model approach as it provides:
     - Type safety
     - IntelliSense support
     - Easier navigation of results
@@ -66,21 +70,31 @@ def main() -> None:
 
     print(f"Analyzing {file_path} with prebuilt-documentSearch...")
 
-    # Use the standard method which returns an AnalyzeResult
-    # Then serialize to JSON for raw access
+    # Use the 'cls' callback parameter to get the raw HTTP response
+    # The 'cls' parameter allows us to intercept the response and return custom data
+    # We return a tuple: (deserialized_object, raw_http_response)
+    # Note: For production use, prefer the object model approach (without cls parameter)
+    #       which returns AnalyzeResult objects that are easier to work with
     poller = client.begin_analyze_binary(
         analyzer_id="prebuilt-documentSearch",
         binary_input=file_bytes,
+        content_type="application/pdf",
+        cls=lambda pipeline_response, deserialized_obj, response_headers: (
+            deserialized_obj,
+            pipeline_response.http_response,
+        ),
     )
-    result = poller.result()
 
-    # Convert to dictionary and then to JSON
-    result_dict = result.as_dict()
+    # Wait for completion and get both the deserialized object and raw HTTP response
+    _, raw_http_response = poller.result()
     # [END analyze_return_raw_json]
 
     # [START parse_raw_json]
+    # Parse the raw JSON response
+    response_json = raw_http_response.json()
+
     # Pretty-print the JSON
-    pretty_json = json.dumps(result_dict, indent=2, ensure_ascii=False, default=str)
+    pretty_json = json.dumps(response_json, indent=2, ensure_ascii=False)
 
     # Create output directory if it doesn't exist
     output_dir = Path(__file__).parent / "sample_output"
@@ -94,17 +108,26 @@ def main() -> None:
     with open(output_path, "w", encoding="utf-8") as f:
         f.write(pretty_json)
 
-    print(f"\nRaw JSON response saved to: {output_path}")
+    print(f"Raw JSON response saved to: {output_path}")
     print(f"File size: {len(pretty_json):,} characters")
-
-    # Show a preview of the JSON structure
-    print("\nJSON Structure Preview:")
-    print("=" * 50)
-    preview = pretty_json[:2000] + "..." if len(pretty_json) > 2000 else pretty_json
-    print(preview)
-    print("=" * 50)
     # [END parse_raw_json]
 
+    # [START extract_from_raw_json]
+    # Extract key information from raw JSON
+    if "result" in response_json:
+        result_data = response_json["result"]
+        if "analyzerId" in result_data:
+            print(f"Analyzer ID: {result_data['analyzerId']}")
+        if "contents" in result_data and isinstance(result_data["contents"], list):
+            print(f"Contents count: {len(result_data['contents'])}")
+            if len(result_data["contents"]) > 0:
+                first_content = result_data["contents"][0]
+                if "kind" in first_content:
+                    print(f"Content kind: {first_content['kind']}")
+                if "mimeType" in first_content:
+                    print(f"MIME type: {first_content['mimeType']}")
+    # [END extract_from_raw_json]
+
 
 if __name__ == "__main__":
     main()
diff --git a/...anding/azure-ai-contentunderstanding/tests/samples/test_sample_analyze_return_raw_json.py b/...anding/azure-ai-contentunderstanding/tests/samples/test_sample_analyze_return_raw_json.py
@@ -18,7 +18,6 @@
 
 import os
 import json
-import pytest
 from devtools_testutils import recorded_by_proxy
 from testpreparer import ContentUnderstandingPreparer, ContentUnderstandingClientTestBase
 
@@ -32,11 +31,11 @@ def test_sample_analyze_return_raw_json(self, azure_content_understanding_endpoi
         """Test analyzing a document and getting raw JSON response.
 
         This test validates:
-        1. Document analysis using protocol method
+        1. Document analysis using 'cls' callback to get raw HTTP response
         2. Raw JSON response format
         3. JSON structure validation
 
-        11_AnalyzeReturnRawJson.AnalyzeReturnRawJsonAsync()
+        11_AnalyzeReturnRawJson.AnalyzeReturnRawJson()
         """
         client = self.create_client(endpoint=azure_content_understanding_endpoint)
 
@@ -55,80 +54,64 @@ def test_sample_analyze_return_raw_json(self, azure_content_understanding_endpoi
         assert len(file_bytes) > 0, "File should not be empty"
         print(f"[PASS] File loaded: {len(file_bytes)} bytes")
 
-        # Analyze the document and get raw response
-        # Note: The Python SDK returns structured objects by default
-        # We can access the raw response through the result
+        # Use 'cls' callback to get raw HTTP response
+        # The 'cls' parameter allows us to intercept the response before it gets deserialized as an object model
+        # We return a tuple: (deserialized_object, raw_http_response)
         poller = client.begin_analyze_binary(
             analyzer_id="prebuilt-documentSearch",
             binary_input=file_bytes,
-            content_type="application/pdf"
+            content_type="application/pdf",
+            cls=lambda pipeline_response, deserialized_obj, response_headers: (
+                deserialized_obj,
+                pipeline_response.http_response,
+            ),
         )
 
-        result = poller.result()
+        # Wait for completion and get both model and raw HTTP response
+        _, raw_http_response = poller.result()
 
         # Assertion: Verify analysis operation completed
         assert poller is not None, "Analysis operation should not be null"
         assert poller.done(), "Operation should be completed"
-
-        # Verify raw response status
-        if hasattr(poller, '_polling_method'):
-            polling_method = getattr(poller, '_polling_method', None)
-            if polling_method and hasattr(polling_method, '_initial_response'):
-                raw_response = getattr(polling_method, '_initial_response', None)  # type: ignore
-                if raw_response:
-                    if hasattr(raw_response, 'http_response'):
-                        status = raw_response.http_response.status_code
-                    elif hasattr(raw_response, 'status_code'):
-                        status = raw_response.status_code
-                    else:
-                        status = None
-
-                    if status:
-                        assert status >= 200 and status < 300, \
-                            f"Response status should be successful (200-299), but was {status}"
-                        print(f"[PASS] Raw response status verified: {status}")
-
         assert poller.status() == "Succeeded", f"Operation status should be Succeeded, but was {poller.status()}"
         print("[PASS] Analysis operation completed successfully")
 
-        # Assertion: Verify result
-        assert result is not None, "Analysis result should not be null"
-        print("[PASS] Response data is not null")
-
-        # Convert result to JSON string to verify raw format capability
-        # In Python SDK, we can serialize the result to JSON
-        try:
-            # Try to access the raw response data
-            if hasattr(result, '__dict__'):
-                result_dict = result.__dict__
-                json_str = json.dumps(result_dict, default=str)
-                assert json_str is not None, "Response string should not be null"
-                assert len(json_str) > 0, "Response string should not be empty"
-                print(f"[PASS] Response converted to JSON string: {len(json_str)} characters")
-
-                # Verify it's valid JSON
-                parsed_json = json.loads(json_str)
-                assert parsed_json is not None, "Response should be valid JSON"
-                print("[PASS] Response is valid JSON format")
-            else:
-                print("[INFO] Result does not have __dict__ attribute, using alternative method")
-
-                # Alternative: Check if result has contents (which confirms it's a valid response)
-                assert hasattr(result, "contents"), "Result should have contents attribute"
-                assert result.contents is not None, "Result contents should not be null"
-                print("[PASS] Response data structure verified")
-
-        except json.JSONDecodeError as e:
-            pytest.fail(f"Response should be valid JSON format: {str(e)}")
-        except Exception as e:
-            print(f"[WARN] Could not serialize to JSON: {str(e)}")
-            # Still verify basic structure
-            assert result is not None, "Result should not be null"
-            print("[PASS] Response data verified (structured format)")
-
-        # Verify the response contains expected data
-        assert hasattr(result, "contents"), "Result should have contents"
-        if result.contents and len(result.contents) > 0:
-            print(f"[PASS] Response contains {len(result.contents)} content(s)")
+        # Assertion: Verify raw HTTP response
+        assert raw_http_response is not None, "Raw HTTP response should not be null"
+        print("[PASS] Raw HTTP response is not null")
+
+        # Parse the raw JSON response
+        response_json = raw_http_response.json()
+
+        # Assertion: Verify JSON is not empty
+        assert response_json is not None, "Response JSON should not be null"
+        print("[PASS] Response JSON parsed successfully")
+
+        # Verify it's valid JSON by serializing
+        json_str = json.dumps(response_json, indent=2, ensure_ascii=False)
+        assert json_str is not None, "Response string should not be null"
+        assert len(json_str) > 0, "Response string should not be empty"
+        print(f"[PASS] Response converted to JSON string: {len(json_str)} characters")
+
+        # Verify the response contains expected structure (matching C# sample validation)
+        assert "result" in response_json, "Response should contain 'result' key"
+        result_data = response_json["result"]
+        print("[PASS] Response contains 'result' key")
+
+        # Verify analyzerId
+        if "analyzerId" in result_data:
+            print(f"[PASS] Analyzer ID: {result_data['analyzerId']}")
+
+        # Verify contents
+        if "contents" in result_data and isinstance(result_data["contents"], list):
+            contents_count = len(result_data["contents"])
+            print(f"[PASS] Contents count: {contents_count}")
+
+            if contents_count > 0:
+                first_content = result_data["contents"][0]
+                if "kind" in first_content:
+                    print(f"[PASS] Content kind: {first_content['kind']}")
+                if "mimeType" in first_content:
+                    print(f"[PASS] MIME type: {first_content['mimeType']}")
 
         print("\n[SUCCESS] All test_sample_analyze_return_raw_json assertions passed")