Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,23 @@ async def aset_input_attributes(span, kwargs):
span, f"{prefix}.input_schema", json.dumps(input_schema)
)

output_format = kwargs.get("output_format")
if output_format and isinstance(output_format, dict):
if output_format.get("type") == "json_schema" and output_format.get("schema"):
set_span_attribute(
span,
SpanAttributes.LLM_REQUEST_STRUCTURED_OUTPUT_SCHEMA,
json.dumps(output_format.get("schema")),
)
elif output_format.get("type") == "json" and output_format.get("json_schema"):
schema = output_format.get("json_schema", {}).get("schema")
if schema:
set_span_attribute(
span,
SpanAttributes.LLM_REQUEST_STRUCTURED_OUTPUT_SCHEMA,
json.dumps(schema),
)


async def _aset_span_completions(span, response):
if not should_send_prompts():
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
import json

import pytest
from opentelemetry.semconv._incubating.attributes import (
gen_ai_attributes as GenAIAttributes,
)
from opentelemetry.semconv_ai import SpanAttributes

from .utils import verify_metrics

# NOTE: These tests require anthropic SDK >= 0.50.0 which supports structured outputs
# The feature was announced in November 2025 but the SDK version installed (0.49.0)
# does not yet support the output_format parameter.
# Tests are kept here for when the SDK is updated.


JOKE_SCHEMA = {
"type": "object",
"properties": {
"joke": {
"type": "string",
"description": "A joke about OpenTelemetry"
},
"rating": {
"type": "integer",
"description": "Rating of the joke from 1 to 10"
}
},
"required": ["joke", "rating"],
"additionalProperties": False
}

OUTPUT_FORMAT = {
"type": "json",
"json_schema": {
"name": "joke_response",
"strict": True,
"schema": JOKE_SCHEMA
}
}


@pytest.mark.skip(reason="Requires anthropic SDK >= 0.50.0 with structured outputs support")

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Apply skip decorator consistently across all structured output tests.

Only the first test has the skip decorator for SDK version >= 0.50.0, but all three tests use the same beta.messages.create API with output_format and betas=["structured-outputs-2025-11-13"]. If the SDK version requirement applies to the first test, it should apply to all three tests that exercise the same structured outputs feature.

Apply this diff to add the skip decorator to the remaining tests:

+@pytest.mark.skip(reason="Requires anthropic SDK >= 0.50.0 with structured outputs support")
 @pytest.mark.vcr
 def test_anthropic_structured_outputs_with_events_with_content(
     instrument_with_content, anthropic_client, span_exporter, log_exporter, reader
 ):
+@pytest.mark.skip(reason="Requires anthropic SDK >= 0.50.0 with structured outputs support")
 @pytest.mark.vcr
 def test_anthropic_structured_outputs_with_events_with_no_content(
     instrument_with_no_content, anthropic_client, span_exporter, log_exporter, reader
 ):

Also applies to: 100-100, 146-146

🤖 Prompt for AI Agents
In
packages/opentelemetry-instrumentation-anthropic/tests/test_structured_outputs.py
around lines 38, 100, and 146, the pytest.mark.skip decorator for "Requires
anthropic SDK >= 0.50.0 with structured outputs support" is only applied to the
first test; add the same @pytest.mark.skip(reason="Requires anthropic SDK >=
0.50.0 with structured outputs support") decorator immediately above the other
two test functions (lines ~100 and ~146) so all tests using beta.messages.create
with output_format and betas=["structured-outputs-2025-11-13"] are consistently
skipped when the SDK requirement is not met.

@pytest.mark.skip(reason="Requires anthropic SDK >= 0.50.0 with structured outputs support")
@pytest.mark.vcr
def test_anthropic_structured_outputs_legacy(
instrument_legacy, anthropic_client, span_exporter, log_exporter, reader
):
response = anthropic_client.beta.messages.create(
model="claude-sonnet-4-5-20250929",
max_tokens=1024,
betas=["structured-outputs-2025-11-13"],
messages=[
{
"role": "user",
"content": "Tell me a joke about OpenTelemetry and rate it from 1 to 10"
}
],
output_format=OUTPUT_FORMAT
)

spans = span_exporter.get_finished_spans()
assert len(spans) == 1
assert spans[0].name == "anthropic.chat"

anthropic_span = spans[0]
assert (
anthropic_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.0.content"]
== "Tell me a joke about OpenTelemetry and rate it from 1 to 10"
)
assert anthropic_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.0.role"] == "user"
assert (
anthropic_span.attributes.get(f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content")
== response.content[0].text
)
assert (
anthropic_span.attributes.get(f"{GenAIAttributes.GEN_AI_COMPLETION}.0.role")
== "assistant"
)

assert SpanAttributes.LLM_REQUEST_STRUCTURED_OUTPUT_SCHEMA in anthropic_span.attributes
schema_attr = json.loads(
anthropic_span.attributes[SpanAttributes.LLM_REQUEST_STRUCTURED_OUTPUT_SCHEMA]
)
assert "properties" in schema_attr
assert "joke" in schema_attr["properties"]
assert "rating" in schema_attr["properties"]

assert anthropic_span.attributes.get(GenAIAttributes.GEN_AI_REQUEST_MODEL) == "claude-sonnet-4-5-20250929"
assert anthropic_span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_MODEL) == "claude-sonnet-4-5-20250929"

response_json = json.loads(response.content[0].text)
assert "joke" in response_json
assert "rating" in response_json

metrics_data = reader.get_metrics_data()
resource_metrics = metrics_data.resource_metrics
verify_metrics(resource_metrics, "claude-sonnet-4-5-20250929")

logs = log_exporter.get_finished_logs()
assert len(logs) == 0, (
"Assert that it doesn't emit logs when use_legacy_attributes is True"
)


@pytest.mark.skip(reason="Requires anthropic SDK >= 0.50.0 with structured outputs support")
@pytest.mark.vcr
def test_anthropic_structured_outputs_with_events_with_content(
instrument_with_content, anthropic_client, span_exporter, log_exporter, reader
):
response = anthropic_client.beta.messages.create(
model="claude-sonnet-4-5-20250929",
max_tokens=1024,
betas=["structured-outputs-2025-11-13"],
messages=[
{
"role": "user",
"content": "Tell me a joke about OpenTelemetry and rate it from 1 to 10"
}
],
output_format=OUTPUT_FORMAT
)

spans = span_exporter.get_finished_spans()
assert len(spans) == 1
assert spans[0].name == "anthropic.chat"

anthropic_span = spans[0]

assert SpanAttributes.LLM_REQUEST_STRUCTURED_OUTPUT_SCHEMA in anthropic_span.attributes
schema_attr = json.loads(
anthropic_span.attributes[SpanAttributes.LLM_REQUEST_STRUCTURED_OUTPUT_SCHEMA]
)
assert "properties" in schema_attr
assert "joke" in schema_attr["properties"]
assert "rating" in schema_attr["properties"]

assert anthropic_span.attributes.get(GenAIAttributes.GEN_AI_REQUEST_MODEL) == "claude-sonnet-4-5-20250929"
assert anthropic_span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_MODEL) == "claude-sonnet-4-5-20250929"

response_json = json.loads(response.content[0].text)
assert "joke" in response_json
assert "rating" in response_json

metrics_data = reader.get_metrics_data()
resource_metrics = metrics_data.resource_metrics
verify_metrics(resource_metrics, "claude-sonnet-4-5-20250929")

logs = log_exporter.get_finished_logs()
assert len(logs) == 2


@pytest.mark.skip(reason="Requires anthropic SDK >= 0.50.0 with structured outputs support")
@pytest.mark.vcr
def test_anthropic_structured_outputs_with_events_with_no_content(
instrument_with_no_content, anthropic_client, span_exporter, log_exporter, reader
):
response = anthropic_client.beta.messages.create(
model="claude-sonnet-4-5-20250929",
max_tokens=1024,
betas=["structured-outputs-2025-11-13"],
messages=[
{
"role": "user",
"content": "Tell me a joke about OpenTelemetry and rate it from 1 to 10"
}
],
output_format=OUTPUT_FORMAT
)

spans = span_exporter.get_finished_spans()
assert len(spans) == 1
assert spans[0].name == "anthropic.chat"

anthropic_span = spans[0]

assert SpanAttributes.LLM_REQUEST_STRUCTURED_OUTPUT_SCHEMA in anthropic_span.attributes
schema_attr = json.loads(
anthropic_span.attributes[SpanAttributes.LLM_REQUEST_STRUCTURED_OUTPUT_SCHEMA]
)
assert "properties" in schema_attr
assert "joke" in schema_attr["properties"]
assert "rating" in schema_attr["properties"]

assert anthropic_span.attributes.get(GenAIAttributes.GEN_AI_REQUEST_MODEL) == "claude-sonnet-4-5-20250929"
assert anthropic_span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_MODEL) == "claude-sonnet-4-5-20250929"

response_json = json.loads(response.content[0].text)
assert "joke" in response_json
assert "rating" in response_json

metrics_data = reader.get_metrics_data()
resource_metrics = metrics_data.resource_metrics
verify_metrics(resource_metrics, "claude-sonnet-4-5-20250929")

logs = log_exporter.get_finished_logs()
assert len(logs) == 2
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,27 @@ def set_model_request_attributes(span, kwargs, llm_model):
span, SpanAttributes.LLM_FREQUENCY_PENALTY, kwargs.get("frequency_penalty")
)

generation_config = kwargs.get("generation_config")
if generation_config and hasattr(generation_config, "response_schema"):
try:
_set_span_attribute(
span,
SpanAttributes.LLM_REQUEST_STRUCTURED_OUTPUT_SCHEMA,
json.dumps(generation_config.response_schema),
)
except Exception:
pass

if "response_schema" in kwargs:
try:
_set_span_attribute(
span,
SpanAttributes.LLM_REQUEST_STRUCTURED_OUTPUT_SCHEMA,
json.dumps(kwargs.get("response_schema")),
)
except Exception:
pass


@dont_throw
def set_response_attributes(span, response, llm_model):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from anthropic import Anthropic
from traceloop.sdk import Traceloop
from dotenv import load_dotenv

load_dotenv()

client = Anthropic()

Traceloop.init(
app_name="anthropic_structured_outputs_demo",
)


def main():
print("Making request with structured outputs...")

joke_schema = {
"type": "object",
"properties": {
"joke": {
"type": "string",
"description": "A joke about OpenTelemetry"
},
"rating": {
"type": "integer",
"description": "Rating of the joke from 1 to 10"
}
},
"required": ["joke", "rating"],
"additionalProperties": False
}

response = client.beta.messages.create(
model="claude-sonnet-4-5-20250929",
max_tokens=1024,
betas=["structured-outputs-2025-11-13"],
messages=[
{
"role": "user",
"content": "Tell me a joke about OpenTelemetry and rate it from 1 to 10"
}
],
output_format={
"type": "json_schema",
"schema": joke_schema
}
)

print("\n=== Response ===")
print(response.content[0].text)
print("\n=== The 'gen_ai.request.structured_output_schema' attribute should be logged ===")


if __name__ == "__main__":
main()
49 changes: 49 additions & 0 deletions packages/sample-app/sample_app/gemini_structured_outputs_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import os
import google.generativeai as genai
from traceloop.sdk import Traceloop
from dotenv import load_dotenv

load_dotenv()

genai.configure(api_key=os.environ.get("GOOGLE_API_KEY"))

Traceloop.init(
app_name="gemini_structured_outputs_demo",
)


def main():
print("Making request with structured outputs...")

response_schema = {
"type": "object",
"properties": {
"joke": {
"type": "string",
"description": "A joke about OpenTelemetry"
},
"rating": {
"type": "integer",
"description": "Rating of the joke from 1 to 10"
}
},
"required": ["joke", "rating"]
}

model = genai.GenerativeModel("gemini-1.5-flash")

result = model.generate_content(
"Tell me a joke about OpenTelemetry and rate it",
generation_config=genai.GenerationConfig(
response_mime_type="application/json",
response_schema=response_schema
)
)

print("\n=== Response ===")
print(result.text)
print("\n=== The 'gen_ai.request.structured_output_schema' attribute should be logged ===")


if __name__ == "__main__":
main()
38 changes: 38 additions & 0 deletions packages/sample-app/sample_app/openai_structured_outputs_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import os
from openai import OpenAI
from pydantic import BaseModel
from traceloop.sdk import Traceloop
from dotenv import load_dotenv

Comment thread
coderabbitai[bot] marked this conversation as resolved.
load_dotenv()

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

Traceloop.init(
app_name="structured_outputs_demo",
)


class Joke(BaseModel):
joke: str
rating: int


def main():
print("Making request with structured outputs...")
response = client.beta.chat.completions.parse(
model="gpt-4o-2024-08-06",
messages=[{"role": "user", "content": "Tell me a joke about OpenTelemetry"}],
response_format=Joke,
)

print("\n=== Response ===")
print(f"Joke: {response.choices[0].message.parsed.joke}")
print(f"Rating: {response.choices[0].message.parsed.rating}")
print(
"\n=== Check the span output above for 'gen_ai.request.structured_output_schema' attribute ==="
)


if __name__ == "__main__":
main()
Loading