Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

community: fixes json loader not getting texts with json standard #27327

Merged
merged 3 commits into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -186,12 +186,13 @@ def _get_text(self, sample: Any) -> str:
)

# In case the text is None, set it to an empty string
elif isinstance(content, str):
return content
elif isinstance(content, dict):
return json.dumps(content) if content else ""
else:
return str(content) if content is not None else ""
match content:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

match statements were introduced in 3.10, but we support 3.9+. Please revert to if/elif

case list() | dict():
return json.dumps(content) if content else ""
case str():
return content
case _:
return str(content) if content is not None else ""

def _get_metadata(
self, sample: Dict[str, Any], **additional_fields: Any
Expand Down
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is fine but in general would recommend against changing both of these things together!

Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import io
from pathlib import Path
from typing import Any, Dict

import pytest
Expand All @@ -12,7 +13,7 @@


def test_load_valid_string_content(mocker: MockerFixture) -> None:
file_path = "/workspaces/langchain/test.json"
file_path = str(Path("/workspaces/langchain/test.json").resolve())
expected_docs = [
Document(
page_content="value1",
Expand All @@ -37,7 +38,7 @@ def test_load_valid_string_content(mocker: MockerFixture) -> None:


def test_load_valid_dict_content(mocker: MockerFixture) -> None:
file_path = "/workspaces/langchain/test.json"
file_path = str(Path("/workspaces/langchain/test.json").resolve())
expected_docs = [
Document(
page_content='{"text": "value1"}',
Expand All @@ -64,7 +65,7 @@ def test_load_valid_dict_content(mocker: MockerFixture) -> None:


def test_load_valid_bool_content(mocker: MockerFixture) -> None:
file_path = "/workspaces/langchain/test.json"
file_path = str(Path("/workspaces/langchain/test.json").resolve())
expected_docs = [
Document(
page_content="False",
Expand Down Expand Up @@ -93,7 +94,7 @@ def test_load_valid_bool_content(mocker: MockerFixture) -> None:


def test_load_valid_numeric_content(mocker: MockerFixture) -> None:
file_path = "/workspaces/langchain/test.json"
file_path = str(Path("/workspaces/langchain/test.json").resolve())
expected_docs = [
Document(
page_content="99",
Expand Down Expand Up @@ -122,7 +123,7 @@ def test_load_valid_numeric_content(mocker: MockerFixture) -> None:


def test_load_invalid_test_content(mocker: MockerFixture) -> None:
file_path = "/workspaces/langchain/test.json"
file_path = str(Path("/workspaces/langchain/test.json").resolve())

mocker.patch("builtins.open", mocker.mock_open())
mocker.patch(
Expand All @@ -139,7 +140,7 @@ def test_load_invalid_test_content(mocker: MockerFixture) -> None:


def test_load_jsonlines(mocker: MockerFixture) -> None:
file_path = "/workspaces/langchain/test.json"
file_path = str(Path("/workspaces/langchain/test.json").resolve())
expected_docs = [
Document(
page_content="value1",
Expand Down Expand Up @@ -177,7 +178,7 @@ def test_load_jsonlines(mocker: MockerFixture) -> None:
),
)
def test_load_jsonlines_list(params: Dict, mocker: MockerFixture) -> None:
file_path = "/workspaces/langchain/test.json"
file_path = str(Path("/workspaces/langchain/test.json").resolve())
expected_docs = [
Document(
page_content="value1",
Expand Down Expand Up @@ -250,7 +251,7 @@ def test_json_meta_01(
mocker.patch("builtins.open", mocker.mock_open())
mocker.patch(patch_func, return_value=patch_func_value)

file_path = "/workspaces/langchain/test.json"
file_path = str(Path("/workspaces/langchain/test.json").resolve())
expected_docs = [
Document(
page_content="value1",
Expand Down Expand Up @@ -300,7 +301,7 @@ def test_json_meta_02(
mocker.patch("builtins.open", mocker.mock_open())
mocker.patch(patch_func, return_value=patch_func_value)

file_path = "/workspaces/langchain/test.json"
file_path = str(Path("/workspaces/langchain/test.json").resolve())
expected_docs = [
Document(
page_content="value1",
Expand Down Expand Up @@ -336,7 +337,7 @@ def metadata_func(record: Dict, metadata: Dict) -> Dict:
def test_load_json_with_jq_parsable_content_key(
params: Dict, mocker: MockerFixture
) -> None:
file_path = "/workspaces/langchain/test.json"
file_path = str(Path("/workspaces/langchain/test.json").resolve())
expected_docs = [
Document(
page_content="value1",
Expand Down Expand Up @@ -364,7 +365,7 @@ def test_load_json_with_jq_parsable_content_key(


def test_load_json_with_nested_jq_parsable_content_key(mocker: MockerFixture) -> None:
file_path = "/workspaces/langchain/test.json"
file_path = str(Path("/workspaces/langchain/test.json").resolve())
expected_docs = [
Document(
page_content="message1",
Expand Down Expand Up @@ -401,7 +402,7 @@ def test_load_json_with_nested_jq_parsable_content_key(mocker: MockerFixture) ->
def test_load_json_with_nested_jq_parsable_content_key_with_metadata(
mocker: MockerFixture,
) -> None:
file_path = "/workspaces/langchain/test.json"
file_path = str(Path("/workspaces/langchain/test.json").resolve())
expected_docs = [
Document(
page_content="message1",
Expand Down
Loading