open-telemetry · Nick-heo-eg · Jan 17, 2026 · Jan 17, 2026 · Jan 19, 2026
@@ -265,4 +265,48 @@ event with the corresponding operation when span id is not available.
 <!-- END AUTOGENERATED TEXT -->
 <!-- endsemconv -->
 
+### Judgment Boundary Attributes
+
+The judgment boundary attributes (`gen_ai.evaluation.judgment.*`) are intended primarily for event-level auditability and post-hoc inspection, rather than direct high-cardinality metric aggregation.
+
+These attributes capture decision points where a system explicitly evaluated multiple alternative outcomes before selecting a path. Common use cases include content safety checks, cost threshold evaluations, and policy enforcement boundaries.
+
+#### Examples
+
+**Example 1: Content Safety Pre-Execution Check**
+
+```json
+{
+  "name": "gen_ai.evaluation.result",
+  "attributes": {
+    "gen_ai.evaluation.name": "ContentSafety",
+    "gen_ai.evaluation.score.value": 0.85,
+    "gen_ai.evaluation.score.label": "ALLOW",
+    "gen_ai.evaluation.judgment.selected_path": "allow",
+    "gen_ai.evaluation.judgment.alternatives_evaluated": true,
+    "gen_ai.evaluation.judgment.phase": "pre_execution",
+    "gen_ai.evaluation.judgment.human_in_loop": false,
+    "gen_ai.evaluation.explanation": "Content safety score 0.85 exceeds threshold 0.7. Alternatives considered: 'allow', 'moderate', 'block'. Selected 'allow' based on score."
+  }
+}
+```
+
+**Example 2: Cost Boundary with Human Escalation**
+
+```json
+{
+  "name": "gen_ai.evaluation.result",
+  "attributes": {
+    "gen_ai.evaluation.name": "CostBoundary",
+    "gen_ai.evaluation.score.value": 1250.0,
+    "gen_ai.evaluation.score.label": "HOLD",
+    "gen_ai.evaluation.judgment.selected_path": "escalate",
+    "gen_ai.evaluation.judgment.alternatives_evaluated": true,
+    "gen_ai.evaluation.judgment.phase": "post_execution",
+    "gen_ai.evaluation.judgment.human_in_loop": true,
+    "gen_ai.evaluation.explanation": "Estimated cost $1250 exceeds automatic approval limit $1000. Alternatives 'block' and 'allow' evaluated; human approval requested."
+  }
+}
+```
+
 [DocumentStatus]: https://opentelemetry.io/docs/specs/otel/document-status
@@ -44,3 +44,7 @@ groups:
           The `error.type` SHOULD match the error code returned by the Generative AI Evaluation provider or the client library,
           the canonical name of exception that occurred, or another low-cardinality error identifier.
           Instrumentations SHOULD document the list of errors they report.
+      - ref: gen_ai.evaluation.judgment.phase
+      - ref: gen_ai.evaluation.judgment.selected_path
+      - ref: gen_ai.evaluation.judgment.alternatives_evaluated
+      - ref: gen_ai.evaluation.judgment.human_in_loop
@@ -591,6 +591,36 @@ groups:
         type: string
         brief: A free-form explanation for the assigned score provided by the evaluator.
         examples: ["The response is factually accurate but lacks sufficient detail to fully address the question."]
+      - id: gen_ai.evaluation.judgment.phase
+        stability: experimental
+        type: string
+        brief: Phase at which a judgment boundary evaluation occurred.
+        note: >
+          Indicates when the system evaluated alternative decision paths,
+          such as before or after execution.
+        examples: ["pre_execution", "post_execution"]
+      - id: gen_ai.evaluation.judgment.selected_path
+        stability: experimental
+        type: string
+        brief: The decision path selected after evaluating alternatives.
+        note: >
+          Records the outcome chosen by the system after explicitly
+          evaluating multiple possible paths.
+        examples: ["allow", "block", "escalate"]
+      - id: gen_ai.evaluation.judgment.alternatives_evaluated
+        stability: experimental
+        type: boolean
+        brief: Whether multiple alternative decision paths were evaluated.
+        note: >
+          Indicates that the system explicitly considered more than one
+          possible outcome before selecting a path.
+      - id: gen_ai.evaluation.judgment.human_in_loop
+        stability: experimental
+        type: boolean
+        brief: Whether a human participated in the final decision.
+        note: >
+          Indicates that a human made or confirmed the final decision
+          at the judgment boundary.
       - id: gen_ai.prompt.name
         type: string
         brief: The name of the prompt that uniquely identifies it.