Skip to content

Commit

Permalink
feat: Add JSON Cleaner Component (langflow-ai#2584)
Browse files Browse the repository at this point in the history
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Gabriel Luiz Freitas Almeida <[email protected]>
  • Loading branch information
3 people authored Jul 12, 2024
1 parent 8813143 commit c522a7a
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 6 deletions.
23 changes: 17 additions & 6 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -94,10 +94,14 @@ langchain-aws = "^0.1.6"
langchain-mongodb = "^0.1.6"
kubernetes = "^30.1.0"
firecrawl-py = "^0.0.16"

json-repair = "^0.25.2"

langchain-nvidia-ai-endpoints = "^0.1.2"
langchain-google-calendar-tools = "^0.0.1"



[tool.poetry.group.dev.dependencies]
types-redis = "^4.6.0.5"
ipykernel = "^6.29.0"
Expand Down
93 changes: 93 additions & 0 deletions src/backend/base/langflow/components/prototypes/JSONCleaner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import json
import re
import unicodedata
from langflow.custom import Component
from langflow.inputs import MessageTextInput, BoolInput
from langflow.template import Output
from langflow.schema.message import Message


class JSONCleaner(Component):
display_name = "JSON Cleaner"
description = "Cleans the messy and sometimes incorrect JSON strings produced by LLMs so that they are fully compliant with the JSON spec."
icon = "custom_components"

inputs = [
MessageTextInput(
name="json_str", display_name="JSON String", info="The JSON string to be cleaned.", required=True
),
BoolInput(
name="remove_control_chars",
display_name="Remove Control Characters",
info="Remove control characters from the JSON string.",
required=False,
),
BoolInput(
name="normalize_unicode",
display_name="Normalize Unicode",
info="Normalize Unicode characters in the JSON string.",
required=False,
),
BoolInput(
name="validate_json",
display_name="Validate JSON",
info="Validate the JSON string to ensure it is well-formed.",
required=False,
),
]

outputs = [
Output(display_name="Cleaned JSON String", name="output", method="clean_json"),
]

def clean_json(self) -> Message:
try:
from json_repair import repair_json # type: ignore
except ImportError:
raise ImportError(
"Could not import the json_repair package." "Please install it with `pip install json_repair`."
)

"""Clean the input JSON string based on provided options and return the cleaned JSON string."""
json_str = self.json_str
remove_control_chars = self.remove_control_chars
normalize_unicode = self.normalize_unicode
validate_json = self.validate_json

try:
start = json_str.find("{")
end = json_str.rfind("}")
if start == -1 or end == -1:
raise ValueError("Invalid JSON string: Missing '{' or '}'")
json_str = json_str[start : end + 1]

if remove_control_chars:
json_str = self._remove_control_characters(json_str)
if normalize_unicode:
json_str = self._normalize_unicode(json_str)
if validate_json:
json_str = self._validate_json(json_str)

cleaned_json_str = repair_json(json_str)
result = str(cleaned_json_str)

self.status = result
return Message(text=result)
except Exception as e:
raise ValueError(f"Error cleaning JSON string: {str(e)}")

def _remove_control_characters(self, s: str) -> str:
"""Remove control characters from the string."""
return re.sub(r"[\x00-\x1F\x7F]", "", s)

def _normalize_unicode(self, s: str) -> str:
"""Normalize Unicode characters in the string."""
return unicodedata.normalize("NFC", s)

def _validate_json(self, s: str) -> str:
"""Validate the JSON string."""
try:
json.loads(s)
return s
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON string: {str(e)}")

0 comments on commit c522a7a

Please sign in to comment.