diff --git a/poetry.lock b/poetry.lock index 752e83b8c218..08fe4b3dce38 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2616,8 +2616,8 @@ files = [ [package.dependencies] cffi = {version = ">=1.12.2", markers = "platform_python_implementation == \"CPython\" and sys_platform == \"win32\""} greenlet = [ - {version = ">=2.0.0", markers = "platform_python_implementation == \"CPython\" and python_version < \"3.11\""}, {version = ">=3.0rc3", markers = "platform_python_implementation == \"CPython\" and python_version >= \"3.11\""}, + {version = ">=2.0.0", markers = "platform_python_implementation == \"CPython\" and python_version < \"3.11\""}, ] "zope.event" = "*" "zope.interface" = "*" @@ -2776,12 +2776,12 @@ files = [ google-auth = ">=2.14.1,<3.0.dev0" googleapis-common-protos = ">=1.56.2,<2.0.dev0" grpcio = [ - {version = ">=1.33.2,<2.0dev", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, {version = ">=1.49.1,<2.0dev", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, + {version = ">=1.33.2,<2.0dev", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, ] grpcio-status = [ - {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, {version = ">=1.49.1,<2.0.dev0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, + {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, ] proto-plus = ">=1.22.3,<2.0.0dev" protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0" @@ -4241,6 +4241,17 @@ files = [ {file = "jq-1.7.0.tar.gz", hash = "sha256:f460d1f2c3791617e4fb339fa24efbdbebe672b02c861f057358553642047040"}, ] +[[package]] +name = "json-repair" +version = "0.25.2" +description = "A package to repair broken json strings" +optional = false +python-versions = ">=3.7" +files = [ + {file = "json_repair-0.25.2-py3-none-any.whl", hash = "sha256:51d67295c3184b6c41a3572689661c6128cef6cfc9fb04db63130709adfc5bf0"}, + {file = "json_repair-0.25.2.tar.gz", hash = "sha256:161a56d7e6bbfd4cad3a614087e3e0dbd0e10d402dd20dc7db418432428cb32b"}, +] + [[package]] name = "jsonpatch" version = "1.33" @@ -4979,8 +4990,8 @@ psutil = ">=5.9.1" pywin32 = {version = "*", markers = "platform_system == \"Windows\""} pyzmq = ">=25.0.0" requests = [ - {version = ">=2.26.0", markers = "python_version <= \"3.11\""}, {version = ">=2.32.2", markers = "python_version > \"3.11\""}, + {version = ">=2.26.0", markers = "python_version <= \"3.11\""}, ] tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} typing-extensions = {version = ">=4.6.0", markers = "python_version < \"3.11\""} @@ -6518,9 +6529,9 @@ files = [ [package.dependencies] numpy = [ + {version = ">=1.26.0,<2", markers = "python_version >= \"3.12\""}, {version = ">=1.22.4,<2", markers = "python_version < \"3.11\""}, {version = ">=1.23.2,<2", markers = "python_version == \"3.11\""}, - {version = ">=1.26.0,<2", markers = "python_version >= \"3.12\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -11315,4 +11326,4 @@ local = ["ctransformers", "llama-cpp-python", "sentence-transformers"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.13" -content-hash = "0dcc235615bcef9db6b669b27a2588f58121bf6233545a469b08206cb5a6e63b" +content-hash = "e4d237d2f128824e707fb5e2c0ba645495fc0947ba868099aac39a3e4424ac5d" diff --git a/pyproject.toml b/pyproject.toml index fd7abecf7040..b0a56ece3a37 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -94,10 +94,14 @@ langchain-aws = "^0.1.6" langchain-mongodb = "^0.1.6" kubernetes = "^30.1.0" firecrawl-py = "^0.0.16" + +json-repair = "^0.25.2" + langchain-nvidia-ai-endpoints = "^0.1.2" langchain-google-calendar-tools = "^0.0.1" + [tool.poetry.group.dev.dependencies] types-redis = "^4.6.0.5" ipykernel = "^6.29.0" diff --git a/src/backend/base/langflow/components/prototypes/JSONCleaner.py b/src/backend/base/langflow/components/prototypes/JSONCleaner.py new file mode 100644 index 000000000000..d3f1a7ac76a3 --- /dev/null +++ b/src/backend/base/langflow/components/prototypes/JSONCleaner.py @@ -0,0 +1,93 @@ +import json +import re +import unicodedata +from langflow.custom import Component +from langflow.inputs import MessageTextInput, BoolInput +from langflow.template import Output +from langflow.schema.message import Message + + +class JSONCleaner(Component): + display_name = "JSON Cleaner" + description = "Cleans the messy and sometimes incorrect JSON strings produced by LLMs so that they are fully compliant with the JSON spec." + icon = "custom_components" + + inputs = [ + MessageTextInput( + name="json_str", display_name="JSON String", info="The JSON string to be cleaned.", required=True + ), + BoolInput( + name="remove_control_chars", + display_name="Remove Control Characters", + info="Remove control characters from the JSON string.", + required=False, + ), + BoolInput( + name="normalize_unicode", + display_name="Normalize Unicode", + info="Normalize Unicode characters in the JSON string.", + required=False, + ), + BoolInput( + name="validate_json", + display_name="Validate JSON", + info="Validate the JSON string to ensure it is well-formed.", + required=False, + ), + ] + + outputs = [ + Output(display_name="Cleaned JSON String", name="output", method="clean_json"), + ] + + def clean_json(self) -> Message: + try: + from json_repair import repair_json # type: ignore + except ImportError: + raise ImportError( + "Could not import the json_repair package." "Please install it with `pip install json_repair`." + ) + + """Clean the input JSON string based on provided options and return the cleaned JSON string.""" + json_str = self.json_str + remove_control_chars = self.remove_control_chars + normalize_unicode = self.normalize_unicode + validate_json = self.validate_json + + try: + start = json_str.find("{") + end = json_str.rfind("}") + if start == -1 or end == -1: + raise ValueError("Invalid JSON string: Missing '{' or '}'") + json_str = json_str[start : end + 1] + + if remove_control_chars: + json_str = self._remove_control_characters(json_str) + if normalize_unicode: + json_str = self._normalize_unicode(json_str) + if validate_json: + json_str = self._validate_json(json_str) + + cleaned_json_str = repair_json(json_str) + result = str(cleaned_json_str) + + self.status = result + return Message(text=result) + except Exception as e: + raise ValueError(f"Error cleaning JSON string: {str(e)}") + + def _remove_control_characters(self, s: str) -> str: + """Remove control characters from the string.""" + return re.sub(r"[\x00-\x1F\x7F]", "", s) + + def _normalize_unicode(self, s: str) -> str: + """Normalize Unicode characters in the string.""" + return unicodedata.normalize("NFC", s) + + def _validate_json(self, s: str) -> str: + """Validate the JSON string.""" + try: + json.loads(s) + return s + except json.JSONDecodeError as e: + raise ValueError(f"Invalid JSON string: {str(e)}")