Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: escape directory to prevent \n on Windows directory name to fail on Pathlib + Tests #4101

Merged
merged 5 commits into from
Oct 14, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions src/backend/base/langflow/base/data/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,19 @@
return path.name.startswith(".")


def format_directory_path(path: str) -> str:
"""
Format a directory path to ensure it's properly escaped and valid.

Args:
path (str): The input path string.

Returns:
str: A properly formatted path string.
"""

Check failure on line 56 in src/backend/base/langflow/base/data/utils.py

View workflow job for this annotation

GitHub Actions / Ruff Style Check (3.12)

Ruff (D212)

src/backend/base/langflow/base/data/utils.py:48:5: D212 Multi-line docstring summary should start at the first line
return path.replace("\n", "\\n")


def retrieve_file_paths(
path: str,
*,
Expand All @@ -52,6 +65,7 @@
depth: int,
types: list[str] = TEXT_FILE_TYPES,
) -> list[str]:
path = format_directory_path(path)
path_obj = Path(path)
if not path_obj.exists() or not path_obj.is_dir():
msg = f"Path {path} must exist and be a directory."
Expand Down
46 changes: 46 additions & 0 deletions src/backend/tests/unit/utils/test_format_directory_path.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import pytest
from langflow.base.data.utils import format_directory_path


@pytest.mark.parametrize(
"input_path, expected",
[
# Test case 1: Standard path with no newlines (no change expected)
("/home/user/documents/file.txt", "/home/user/documents/file.txt"),
# Test case 2: Path with newline character (replace \n with \\n)
("/home/user/docu\nments/file.txt", "/home/user/docu\\nments/file.txt"),
# Test case 3: Path with multiple newline characters
("/home/user/\ndocu\nments/file.txt", "/home/user/\\ndocu\\nments/file.txt"),
# Test case 4: Path with only newline characters
("\n\n\n", "\\n\\n\\n"),
# Test case 5: Empty path (as per the original function, this remains an empty string)
("", ""),
# Test case 6: Path with mixed newlines and other special characters
("/home/user/my-\ndocs/special_file!.pdf", "/home/user/my-\\ndocs/special_file!.pdf"),
# Test case 7: Windows-style path with newline
("C:\\Users\\\nDocuments\\file.txt", "C:\\Users\\\\nDocuments\\file.txt"), # No conversion of backslashes
# Test case 8: Path with trailing newline
("/home/user/documents/\n", "/home/user/documents/\\n"),
# Test case 9: Path with leading newline
("\n/home/user/documents/", "\\n/home/user/documents/"),
# Test case 10: Path with multiple consecutive newlines
("/home/user/docu\n\nments/file.txt", "/home/user/docu\\n\\nments/file.txt"),
# Test case 11: Windows-style path (backslashes remain unchanged)
("C:\\Users\\Documents\\file.txt", "C:\\Users\\Documents\\file.txt"),
# Test case 12: Windows path with trailing backslash
("C:\\Users\\Documents\\", "C:\\Users\\Documents\\"),
# Test case 13: Mixed separators (leave as is)
("C:/Users\\Documents/file.txt", "C:/Users\\Documents/file.txt"),
# Test case 14: Network path (UNC) (leave backslashes as is)
("\\\\server\\share\\file.txt", "\\\\server\\share\\file.txt"),
],
)
def test_format_directory_path(input_path, expected):
result = format_directory_path(input_path)
assert result == expected


# Additional test for type checking
def test_format_directory_path_type():
result = format_directory_path("/home/user/file.txt")
assert isinstance(result, str)
59 changes: 27 additions & 32 deletions src/backend/tests/unit/utils/test_rewrite_file_path.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,38 @@
from langflow.graph.utils import rewrite_file_path
from langflow.base.data.utils import format_directory_path
import pytest


@pytest.mark.parametrize(
"file_path, expected",
"input_path, expected",
[
# Test case 1: Standard path with multiple directories
("/home/user/documents/file.txt", ["documents/file.txt"]),
# Test case 2: Path with only one directory
("/documents/file.txt", ["documents/file.txt"]),
# Test case 3: Path with no directories (just filename)
("file.txt", ["file.txt"]),
# Test case 4: Path with multiple levels and special characters
("/home/user/my-docs/special_file!.pdf", ["my-docs/special_file!.pdf"]),
# Test case 5: Path with trailing slash
("/home/user/documents/", ["user/documents"]),
# Test case 6: Empty path
("", [""]),
# Test case 7: Path with only slashes
("///", [""]),
# Test case 8: Path with dots
("/home/user/../documents/./file.txt", ["./file.txt"]),
# Test case 9: Windows-style path
("C:\\Users\\Documents\\file.txt", ["Documents/file.txt"]),
# Test case 10: Windows path with trailing backslash
("C:\\Users\\Documents\\", ["Users/Documents"]),
# Test case 11: Mixed separators
("C:/Users\\Documents/file.txt", ["Documents/file.txt"]),
# Test case 12: Network path (UNC)
("\\\\server\\share\\file.txt", ["share/file.txt"]),
# Test case 1: Standard path with no newlines
("/home/user/documents/file.txt", "/home/user/documents/file.txt"),
# Test case 2: Path with newline character
("/home/user/docu\nments/file.txt", "/home/user/docu\\nments/file.txt"),
# Test case 3: Path with multiple newline characters
("/home/user/\ndocu\nments/file.txt", "/home/user/\\ndocu\\nments/file.txt"),
# Test case 4: Path with only newline characters
("\n\n\n", "\\n\\n\\n"),
# Test case 5: Empty path
("", ""),
# Test case 6: Path with mixed newlines and other special characters
("/home/user/my-\ndocs/special_file!.pdf", "/home/user/my-\\ndocs/special_file!.pdf"),
# Test case 7: Windows-style path with newline
("C:\\Users\\\nDocuments\\file.txt", "C:\\Users\\\\nDocuments\\file.txt"),
# Test case 8: Path with trailing newline
("/home/user/documents/\n", "/home/user/documents/\\n"),
# Test case 9: Path with leading newline
("\n/home/user/documents/", "\\n/home/user/documents/"),
# Test case 10: Path with multiple consecutive newlines
("/home/user/docu\n\nments/file.txt", "/home/user/docu\\n\\nments/file.txt"),
],
)
def test_rewrite_file_path(file_path, expected):
result = rewrite_file_path(file_path)
def test_format_directory_path(input_path, expected):
result = format_directory_path(input_path)
assert result == expected


# Additional test for type checking
def test_rewrite_file_path_type():
result = rewrite_file_path("/home/user/file.txt")
assert isinstance(result, list)
assert all(isinstance(item, str) for item in result)
def test_format_directory_path_type():
result = format_directory_path("/home/user/file.txt")
assert isinstance(result, str)
Loading