Skip to content
198 changes: 186 additions & 12 deletions src/synthorg/api/openapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@
This module provides :func:`inject_rfc9457_responses` which transforms
the Litestar-generated schema dict to:

1. Add the ``ProblemDetail`` schema (RFC 9457 bare response body)
2. Define reusable error responses with dual content types
3. Inject error response references into every operation
4. Replace Litestar's default 400 schema with the actual envelope
5. Append content negotiation docs to ``info.description``
1. Flatten nullable ``oneOf`` unions to JSON Schema 2020-12 ``type``
arrays (fixes API doc renderers *"Expected union value"* warnings)
2. Add the ``ProblemDetail`` schema (RFC 9457 bare response body)
3. Define reusable error responses with dual content types
4. Inject error response references into every operation
5. Replace Litestar's default 400 schema with the actual envelope
6. Store content negotiation docs in ``info.x-documentation``
Comment thread
coderabbitai[bot] marked this conversation as resolved.

Called by ``scripts/export_openapi.py`` after schema generation.

Expand Down Expand Up @@ -174,6 +176,169 @@ class _ErrorResponseSpec(NamedTuple):
"""


# ── Nullable union normalization ──────────────────────────────
#
# The helpers below mutate ``result`` (a freshly constructed dict from
# the enclosing comprehension in ``_normalize_nullable_unions``) in
# place. They must not be called on the original input schema --
# ``inject_rfc9457_responses`` deep-copies it first.

_SCHEMAS_PREFIX: Final[str] = "#/components/schemas/"


def _flatten_nullable_ref(
result: dict[str, Any],
keyword: str,
branch: dict[str, Any],
all_schemas: dict[str, Any],
) -> bool:
"""Inline a nullable ``$ref`` to an enum schema.

When the ``$ref`` target is a simple enum (has ``type`` and
``enum``), inlines the enum values and flattens to
``{type: [T, "null"], enum: [..., null]}``.

Returns ``True`` if the union was handled, ``False`` otherwise.
"""
ref: str = branch.get("$ref", "")
if not ref.startswith(_SCHEMAS_PREFIX):
return False

target_name = ref.removeprefix(_SCHEMAS_PREFIX)
target = all_schemas.get(target_name, {})

if "enum" not in target or "type" not in target:
return False

prop_desc = result.get("description")
merged = {k: v for k, v in target.items() if k not in ("title", "description")}
merged["type"] = [target["type"], "null"]
merged["enum"] = [*target["enum"], None]
del result[keyword]
result.update(merged)
if prop_desc:
result["description"] = prop_desc
return True


def _flatten_nullable(
result: dict[str, Any],
keyword: str,
items: list[Any],
all_schemas: dict[str, Any] | None = None,
) -> None:
"""Flatten a nullable union (``T | None``) in *result* in place.

* Primitive branch (has ``type``): collapses to
``{type: [T, "null"], ...extras}``.
* ``$ref`` to enum: delegates to :func:`_flatten_nullable_ref`.
* Other ``$ref``: swaps ``oneOf`` to ``anyOf``.
"""
null_entries = [i for i in items if isinstance(i, dict) and i.get("type") == "null"]
if len(null_entries) != 1:
return

non_null = [i for i in items if i is not null_entries[0]]
if len(non_null) != 1:
return

branch = non_null[0]
if isinstance(branch, dict) and "type" in branch:
merged = {k: v for k, v in branch.items() if k != "type"}
merged["type"] = [branch["type"], "null"]
del result[keyword]
result.update(merged)
return

if (
isinstance(branch, dict)
and "$ref" in branch
and all_schemas
and _flatten_nullable_ref(result, keyword, branch, all_schemas)
):
return

if keyword == "oneOf":
result["anyOf"] = result.pop("oneOf")


_EXPECTED_UNION_BRANCHES: Final[int] = 2


def _collapse_redundant_union(
result: dict[str, Any],
keyword: str,
items: list[Any],
) -> None:
"""Collapse a redundant ``oneOf``/``anyOf`` with an empty schema.

Litestar emits ``oneOf: [{$ref: ...}, {}]`` for tuple item
schemas. The empty ``{}`` matches anything, making the union
redundant -- collapse to just the concrete branch.
"""
if len(items) != _EXPECTED_UNION_BRANCHES:
return
empty_entries = [i for i in items if isinstance(i, dict) and not i]
if len(empty_entries) != 1:
return
concrete = [i for i in items if i is not empty_entries[0]]
if concrete:
del result[keyword]
result.update(concrete[0])

Comment on lines +268 to +289

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🌐 Web query:

In JSON Schema 2020-12, what are the validation semantics of anyOf: [, {}]andoneOf: [, {}]? Are either of them equivalent to just ?

💡 Result:

In JSON Schema 2020-12, the empty schema object {} is an empty schema and therefore always passes validation (“Always passes validation, as if the empty schema {}”). [1]

anyOf: [ <schema>, {} ]

anyOf is valid if the instance validates against at least one subschema. [2]
Since {} always validates, anyOf: [<schema>, {}] always validates every instance (regardless of whether <schema> validates). Therefore it is not equivalent to <schema> (unless <schema> itself also accepts every instance). [1][2]

oneOf: [ <schema>, {} ]

oneOf is valid if the instance validates against exactly one subschema. [3]
Since {} always validates, oneOf: [<schema>, {}] validates iff <schema> does not validate (because otherwise both branches validate and “exactly one” fails). So it behaves like not: <schema>, not like <schema>. [1][3]

So: neither is equivalent to just <schema> (except in degenerate cases such as <schema> being always-true / always-false). [1][2][3]

Sources:
[1] JSON Schema Core 2020-12 — “empty schema” / boolean schemas (true behaves like {}) https://json-schema.org/draft/2020-12/json-schema-core
[2] JSON Schema Core 2020-12 — anyOf semantics https://json-schema.org/draft/2020-12/json-schema-core
[3] JSON Schema Core 2020-12 — oneOf semantics https://json-schema.org/draft/2020-12/json-schema-core


Do not collapse oneOf/anyOf with {} into the concrete branch.

Per JSON Schema 2020-12, the empty schema {} always validates. This makes anyOf: [<schema>, {}] always valid (not equivalent to <schema>), and oneOf: [<schema>, {}] validate only when <schema> does not (equivalent to not: <schema>). Collapsing either to just the concrete branch using result.update(concrete[0]) changes the validation semantics. If Litestar's tuple representation needs rewriting, detect and rewrite that specific container structure instead of applying this generic rule.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/synthorg/api/openapi.py` around lines 268 - 288, The current
_collapse_redundant_union function incorrectly replaces any oneOf/anyOf
containing {} with the concrete branch (using result.update(concrete[0])), which
changes JSON Schema semantics; instead, stop the generic collapse and only
rewrite when you can confidently detect Litestar's tuple encoding pattern
(inspect keyword, items, and use _EXPECTED_UNION_BRANCHES to ensure shape) — for
example, match the specific container structure Litestar emits for tuples and
transform that structure only, otherwise return without modifying result or
deleting result[keyword]; preserve behavior for unknown schemas to avoid
changing validation semantics.


def _normalize_nullable_unions(
obj: Any,
all_schemas: dict[str, Any] | None = None,
) -> Any:
"""Flatten nullable union schemas to idiomatic JSON Schema 2020-12.

Litestar wraps ``T | None`` fields in ``oneOf``, producing
``oneOf: [{type: "string"}, {type: "null"}]``. API doc renderers
expects the compact ``type: ["string", "null"]`` form for
primitives, and ``anyOf`` for ``$ref``-based nullables.
Comment thread
coderabbitai[bot] marked this conversation as resolved.

Args:
obj: Any JSON-serialisable value (typically the full OpenAPI
schema dict).
all_schemas: ``components.schemas`` dict used to resolve
``$ref`` targets for enum inlining. When ``None``,
``$ref``-based nullable unions are converted to ``anyOf``
(enums cannot be inlined without schema resolution).

Conversion rules (applied to both ``oneOf`` and ``anyOf``):

* **Primitive nullable** -- non-null branch has a ``type`` key:
merge into ``{type: [T, "null"], ...extras}``.
* **Enum $ref nullable** -- non-null branch is a ``$ref`` to a
simple enum: inline the enum values and flatten.
* **Object $ref nullable** -- non-null branch is a ``$ref`` to
a complex schema: convert to ``anyOf`` (known renderer
bug -- see linked issue for details).
* **Redundant union** -- one branch is an empty schema ``{}``:
collapse to just the non-empty branch (Litestar emits this
for ``tuple[T, ...]`` item schemas).
* **Discriminated unions** -- no ``{"type": "null"}`` entry and
no empty-schema branch: left unchanged.
"""
if isinstance(obj, dict):
result = {k: _normalize_nullable_unions(v, all_schemas) for k, v in obj.items()}

for keyword in ("oneOf", "anyOf"):
if keyword not in result or not isinstance(result[keyword], list):
continue
_flatten_nullable(result, keyword, result[keyword], all_schemas)
if keyword in result:
# Re-fetch: _flatten_nullable may have replaced the list.
_collapse_redundant_union(result, keyword, result[keyword])

return result

if isinstance(obj, list):
return [_normalize_nullable_unions(item, all_schemas) for item in obj]
return obj
Comment on lines +291 to +340

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🌐 Web query:

In OpenAPI 3.1, which parts of the document are Schema Objects, and are example/examples values arbitrary instance data rather than schema definitions?

💡 Result:

Where Schema Objects appear in an OpenAPI 3.1 document

A Schema Object (i.e., an OpenAPI-flavored wrapper around JSON Schema 2020-12) is used anywhere you see a schema: field, most notably:

Are example / examples arbitrary instance data (not schemas)?

Yes. In OpenAPI 3.1:

  • example (where allowed) is “Any” and is an example instance value expected to match/align with the associated schema. (spec.openapis.org)
  • examples in Parameters / Media Types is a map of Example Objects, whose value is an embedded literal example (instance data), not a schema. (spec.openapis.org)
  • Within the Schema Object itself, examples (JSON Schema’s keyword) is preferred; OpenAPI’s Schema Object example is retained mainly for compatibility and is described as a free-form example of an instance. (github.com)

Citations:


🏁 Script executed:

# First, let's check the file size and read the relevant section
wc -l src/synthorg/api/openapi.py

Repository: Aureliolo/synthorg

Length of output: 93


🏁 Script executed:

# Read the function in question (lines 290-339)
sed -n '290,339p' src/synthorg/api/openapi.py

Repository: Aureliolo/synthorg

Length of output: 2315


🏁 Script executed:

# Also check lines 656-658 mentioned in "Also applies to"
sed -n '650,665p' src/synthorg/api/openapi.py

Repository: Aureliolo/synthorg

Length of output: 603


🏁 Script executed:

# Find where inject_rfc9457_responses() calls this function
rg -n "inject_rfc9457_responses|_normalize_nullable_unions" src/synthorg/api/openapi.py

Repository: Aureliolo/synthorg

Length of output: 624


Restrict schema normalization to Schema Object locations only; do not process the entire OpenAPI document.

_normalize_nullable_unions() recursively processes every dict/list in the input, and inject_rfc9457_responses() (line 658) feeds it the entire OpenAPI document. This means example/examples payloads or extension values that happen to contain oneOf/anyOf will be rewritten as if they were schema definitions, silently corrupting user-supplied instance data. Per OpenAPI 3.1, example/examples are arbitrary instance data, not schema definitions, and should never be transformed.

Instead of walking the full document, restrict normalization to known Schema Object locations:

  • components.schemas
  • Parameter/header schema fields
  • Request/response body media-type schema fields
  • Callback/webhook body schemas

This prevents accidental corruption of example data while still covering all actual schema definitions.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/synthorg/api/openapi.py` around lines 290 - 339, The current
_normalize_nullable_unions walks the entire OpenAPI dict and mutates any
dict/list containing oneOf/anyOf (including examples), so change the flow to
only normalize actual Schema Object locations: call _normalize_nullable_unions
only on components.schemas entries and on every Schema Object found at
parameter/header.schema, requestBody.content.*.schema,
response.content.*.schema, and callback/webhook body schema locations (and not
on full document or example/extension values). Implement a small walker in
inject_rfc9457_responses (or a new helper) that iterates these known locations
and invokes _normalize_nullable_unions (which can remain schema-focused and call
_flatten_nullable/_collapse_redundant_union), leaving all other parts of the
OpenAPI dict untouched so examples and extensions are not modified.



# ── Helpers ───────────────────────────────────────────────────


Expand Down Expand Up @@ -437,11 +602,14 @@ def _build_all_responses(


def _update_info_description(info: dict[str, Any]) -> None:
"""Append RFC 9457 documentation to ``info.description`` idempotently."""
existing = info.get("description", "")
if "## Error Handling (RFC 9457)" not in existing:
separator = "\n\n" if existing else ""
info["description"] = f"{existing}{separator}{_RFC9457_DESCRIPTION_SECTION}"
"""Store RFC 9457 documentation in an extension field.

Uses ``x-documentation`` so the content is preserved in the
spec but not rendered inline by API doc renderers (which displays
``info.description`` prominently at the top of the page).
"""
x_doc: dict[str, Any] = info.setdefault("x-documentation", {})
x_doc.setdefault("rfc9457", _RFC9457_DESCRIPTION_SECTION)

Comment thread
coderabbitai[bot] marked this conversation as resolved.

# ── Main function ─────────────────────────────────────────────
Expand All @@ -453,19 +621,21 @@ def inject_rfc9457_responses(schema: dict[str, Any]) -> dict[str, Any]:
Takes the raw schema dict produced by Litestar's
``app.openapi_schema.to_schema()`` and returns a **new** dict with:

- Nullable ``oneOf`` unions flattened to JSON Schema 2020-12
``type`` arrays (fixes API doc renderers validation warnings)
- ``ProblemDetail`` added to ``components.schemas``
- Reusable error responses (dual content types) in
``components.responses``
- Error response refs injected into every operation
- RFC 9457 docs appended to ``info.description``
- RFC 9457 docs stored in ``info.x-documentation``

Args:
schema: OpenAPI schema dict (not modified).

Returns:
Enhanced copy of the schema.
"""
result = copy.deepcopy(schema)
result: dict[str, Any] = copy.deepcopy(schema)

components = result.setdefault("components", {})
schemas = components.setdefault("schemas", {})
Expand All @@ -480,6 +650,10 @@ def inject_rfc9457_responses(schema: dict[str, Any]) -> dict[str, Any]:
)
_update_info_description(result.setdefault("info", {}))

# Normalize after all schemas are in place (including ProblemDetail).
# Workaround for Renderer bug workaround -- see issue #268 for details
result = _normalize_nullable_unions(result, all_schemas=schemas)
Comment thread
coderabbitai[bot] marked this conversation as resolved.

path_count = len(result.get("paths", {}))
logger.debug(
API_OPENAPI_SCHEMA_ENHANCED,
Expand Down
44 changes: 42 additions & 2 deletions tests/integration/api/test_openapi_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,5 +59,45 @@ def test_full_app_schema_enhancement() -> None:
health = result["paths"]["/api/v1/health"]["get"]["responses"]
assert "401" not in health

# Info description updated.
assert "RFC 9457" in result["info"]["description"]
# RFC 9457 docs in x-documentation, not info.description.
assert "RFC 9457" not in result["info"].get("description", "")
assert "rfc9457" in result["info"]["x-documentation"]


def _find_oneof_with_null(
obj: Any,
path: str = "$",
) -> list[str]:
"""Find all ``oneOf`` arrays containing a null type."""
violations: list[str] = []
if isinstance(obj, dict):
if "oneOf" in obj and isinstance(obj["oneOf"], list):
for item in obj["oneOf"]:
if isinstance(item, dict) and item.get("type") == "null":
violations.append(path)
break
for key, value in obj.items():
violations.extend(_find_oneof_with_null(value, f"{path}.{key}"))
elif isinstance(obj, list):
for i, item in enumerate(obj):
violations.extend(_find_oneof_with_null(item, f"{path}[{i}]"))
return violations


@pytest.mark.integration
def test_no_oneof_with_null_after_processing() -> None:
"""No ``oneOf``-with-null survives post-processing.

Catches regressions when new models with optional fields are
added.
"""
from synthorg.api.app import create_app

app = create_app()
schema: dict[str, Any] = app.openapi_schema.to_schema()
result = inject_rfc9457_responses(schema)

violations = _find_oneof_with_null(result)
assert violations == [], (
f"oneOf-with-null found after post-processing: {violations}"
)
30 changes: 15 additions & 15 deletions tests/unit/api/test_openapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -585,32 +585,32 @@ def test_unknown_key_returns_false(self) -> None:

@pytest.mark.unit
class TestInfoDescription:
"""info.description is updated with RFC 9457 documentation."""
"""RFC 9457 documentation is stored in x-documentation extension."""

def test_mentions_rfc_9457(self, base_schema: dict[str, Any]) -> None:
def test_rfc9457_in_x_documentation(self, base_schema: dict[str, Any]) -> None:
result = inject_rfc9457_responses(base_schema)
desc = result["info"]["description"]
assert "RFC 9457" in desc
xdoc = result["info"]["x-documentation"]
assert "rfc9457" in xdoc
assert "RFC 9457" in xdoc["rfc9457"]

def test_mentions_content_negotiation(self, base_schema: dict[str, Any]) -> None:
def test_not_in_description(self, base_schema: dict[str, Any]) -> None:
"""RFC 9457 docs should not pollute info.description."""
result = inject_rfc9457_responses(base_schema)
desc = result["info"]["description"]
assert "application/problem+json" in desc
assert "application/json" in desc
desc = result["info"].get("description", "")
assert "RFC 9457" not in desc

def test_mentions_error_reference(self, base_schema: dict[str, Any]) -> None:
def test_mentions_content_negotiation(self, base_schema: dict[str, Any]) -> None:
result = inject_rfc9457_responses(base_schema)
desc = result["info"]["description"]
assert "synthorg.io/docs/errors" in desc
rfc_doc = result["info"]["x-documentation"]["rfc9457"]
assert "application/problem+json" in rfc_doc
assert "application/json" in rfc_doc

def test_preserves_existing_description(self) -> None:
"""Existing info.description is preserved with RFC section appended."""
"""Existing info.description is not modified."""
schema = _minimal_schema()
schema["info"]["description"] = "My custom API description."
result = inject_rfc9457_responses(schema)
desc = result["info"]["description"]
assert desc.startswith("My custom API description.")
assert "RFC 9457" in desc
assert result["info"]["description"] == "My custom API description."


# ── Idempotency and immutability ──────────────────────────────
Expand Down
Loading
Loading