Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import secrets
import string
import uuid
import warnings
from collections.abc import AsyncGenerator
from datetime import UTC, datetime

Expand Down Expand Up @@ -911,8 +912,16 @@ async def load_data_from_url(url: str) -> str:


async def get_raw_document_text(document: Document) -> str:
if not document.mime_type.startswith("text/"):
# Handle deprecated text/yaml mime type with warning
if document.mime_type == "text/yaml":
warnings.warn(
"The 'text/yaml' MIME type is deprecated. Please use 'application/yaml' instead.",
DeprecationWarning,
stacklevel=2,
)
elif not (document.mime_type.startswith("text/") or document.mime_type == "application/yaml"):
raise ValueError(f"Unexpected document mime type: {document.mime_type}")

if isinstance(document.content, URL):
return await load_data_from_url(document.content.uri)
elif isinstance(document.content, str):
Expand Down
187 changes: 187 additions & 0 deletions tests/unit/providers/agent/test_get_raw_document_text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

import warnings
from unittest.mock import AsyncMock, MagicMock, patch

import pytest

from llama_stack.apis.agents import Document
from llama_stack.apis.common.content_types import URL, TextContentItem
from llama_stack.providers.inline.agents.meta_reference.agent_instance import get_raw_document_text


@pytest.mark.asyncio
async def test_get_raw_document_text_supports_text_mime_types():
"""Test that the function accepts text/* mime types."""
document = Document(content="Sample text content", mime_type="text/plain")

result = await get_raw_document_text(document)
assert result == "Sample text content"


@pytest.mark.asyncio
async def test_get_raw_document_text_supports_yaml_mime_type():
"""Test that the function accepts application/yaml mime type."""
yaml_content = """
name: test
version: 1.0
items:
- item1
- item2
"""

document = Document(content=yaml_content, mime_type="application/yaml")

result = await get_raw_document_text(document)
assert result == yaml_content


@pytest.mark.asyncio
async def test_get_raw_document_text_supports_deprecated_text_yaml_with_warning():
"""Test that the function accepts text/yaml but emits a deprecation warning."""
yaml_content = """
name: test
version: 1.0
items:
- item1
- item2
"""

document = Document(content=yaml_content, mime_type="text/yaml")

with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
result = await get_raw_document_text(document)

# Check that result is correct
assert result == yaml_content

# Check that exactly one warning was issued
assert len(w) == 1
assert issubclass(w[0].category, DeprecationWarning)
assert "text/yaml" in str(w[0].message)
assert "application/yaml" in str(w[0].message)
assert "deprecated" in str(w[0].message).lower()


@pytest.mark.asyncio
async def test_get_raw_document_text_deprecated_text_yaml_with_url():
"""Test that text/yaml works with URL content and emits warning."""
yaml_content = "name: test\nversion: 1.0"

with patch("llama_stack.providers.inline.agents.meta_reference.agent_instance.load_data_from_url") as mock_load:
mock_load.return_value = yaml_content

document = Document(content=URL(uri="https://example.com/config.yaml"), mime_type="text/yaml")

with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
result = await get_raw_document_text(document)

# Check that result is correct
assert result == yaml_content
mock_load.assert_called_once_with("https://example.com/config.yaml")

# Check that deprecation warning was issued
assert len(w) == 1
assert issubclass(w[0].category, DeprecationWarning)
assert "text/yaml" in str(w[0].message)


@pytest.mark.asyncio
async def test_get_raw_document_text_deprecated_text_yaml_with_text_content_item():
"""Test that text/yaml works with TextContentItem and emits warning."""
yaml_content = "key: value\nlist:\n - item1\n - item2"

document = Document(content=TextContentItem(text=yaml_content), mime_type="text/yaml")

with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
result = await get_raw_document_text(document)

# Check that result is correct
assert result == yaml_content

# Check that deprecation warning was issued
assert len(w) == 1
assert issubclass(w[0].category, DeprecationWarning)
assert "text/yaml" in str(w[0].message)


@pytest.mark.asyncio
async def test_get_raw_document_text_rejects_unsupported_mime_types():
"""Test that the function rejects unsupported mime types."""
document = Document(
content="Some content",
mime_type="application/json", # Not supported
)

with pytest.raises(ValueError, match="Unexpected document mime type: application/json"):
await get_raw_document_text(document)


@pytest.mark.asyncio
async def test_get_raw_document_text_with_url_content():
"""Test that the function handles URL content correctly."""
mock_response = AsyncMock()
mock_response.text = "Content from URL"

with patch("llama_stack.providers.inline.agents.meta_reference.agent_instance.load_data_from_url") as mock_load:
mock_load.return_value = "Content from URL"

document = Document(content=URL(uri="https://example.com/test.txt"), mime_type="text/plain")

result = await get_raw_document_text(document)
assert result == "Content from URL"
mock_load.assert_called_once_with("https://example.com/test.txt")


@pytest.mark.asyncio
async def test_get_raw_document_text_with_yaml_url():
"""Test that the function handles YAML URLs correctly."""
yaml_content = "name: test\nversion: 1.0"

with patch("llama_stack.providers.inline.agents.meta_reference.agent_instance.load_data_from_url") as mock_load:
mock_load.return_value = yaml_content

document = Document(content=URL(uri="https://example.com/config.yaml"), mime_type="application/yaml")

result = await get_raw_document_text(document)
assert result == yaml_content
mock_load.assert_called_once_with("https://example.com/config.yaml")


@pytest.mark.asyncio
async def test_get_raw_document_text_with_text_content_item():
"""Test that the function handles TextContentItem correctly."""
document = Document(content=TextContentItem(text="Text content item"), mime_type="text/plain")

result = await get_raw_document_text(document)
assert result == "Text content item"


@pytest.mark.asyncio
async def test_get_raw_document_text_with_yaml_text_content_item():
"""Test that the function handles YAML TextContentItem correctly."""
yaml_content = "key: value\nlist:\n - item1\n - item2"

document = Document(content=TextContentItem(text=yaml_content), mime_type="application/yaml")

result = await get_raw_document_text(document)
assert result == yaml_content


@pytest.mark.asyncio
async def test_get_raw_document_text_rejects_unexpected_content_type():
"""Test that the function rejects unexpected document content types."""
# Create a mock document that bypasses Pydantic validation
mock_document = MagicMock(spec=Document)
mock_document.mime_type = "text/plain"
mock_document.content = 123 # Unexpected content type (not str, URL, or TextContentItem)

with pytest.raises(ValueError, match="Unexpected document content type: <class 'int'>"):
await get_raw_document_text(mock_document)
Loading