Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 60 additions & 23 deletions codemcp/glob.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,31 @@

import os
import re
from typing import Callable, List
from typing import Callable, List, Optional


def translate_pattern(
pattern: str,
editorconfig_braces: bool = False,
editorconfig_asterisk: bool = False,
editorconfig_double_asterisk: bool = False,
editorconfig: bool = False,
) -> str:
"""
Translate a glob pattern to a regular expression pattern.

Args:
pattern: The glob pattern to translate
editorconfig_braces: Enable editorconfig brace expansion {s1,s2,s3} and {n1..n2}
editorconfig_asterisk: If True, '*' matches any string including path separators
editorconfig_double_asterisk: If True, '**' matches any string (editorconfig behavior)
If False, uses gitignore behavior for '**'
editorconfig: If True, uses editorconfig glob syntax rules:
- Enables brace expansion {s1,s2,s3} and {n1..n2}
- '*' matches any string including path separators
- '**' matches any string
If False, uses gitignore glob syntax rules

Returns:
Regular expression pattern string
"""
# For backward compatibility, set individual features based on the single parameter
editorconfig_braces = editorconfig
editorconfig_asterisk = editorconfig
editorconfig_double_asterisk = editorconfig
i, n = 0, len(pattern)
result = []

Expand Down Expand Up @@ -203,9 +206,7 @@ def translate_pattern(
if "{" in item:
item_pattern = translate_pattern(
item,
editorconfig_braces=True,
editorconfig_asterisk=editorconfig_asterisk,
editorconfig_double_asterisk=editorconfig_double_asterisk,
editorconfig=True,
)
# Remove the anchors (^ and $)
if item_pattern.startswith("^"):
Expand All @@ -226,18 +227,25 @@ def translate_pattern(
return "^" + "".join(result) + "$"


def make_matcher(pattern: str, **kwargs) -> Callable[[str], bool]:
def make_matcher(
pattern: str,
*,
editorconfig: bool = False,
) -> Callable[[str], bool]:
"""
Create a matcher function that matches paths against the given pattern.

Args:
pattern: The glob pattern to match against
**kwargs: Optional features to enable
editorconfig: If True, uses editorconfig glob syntax rules for matching

Returns:
A function that takes a path string and returns True if it matches
"""
regex_pattern = translate_pattern(pattern, **kwargs)
regex_pattern = translate_pattern(
pattern,
editorconfig=editorconfig,
)
regex = re.compile(regex_pattern)

def matcher(path: str) -> bool:
Expand All @@ -246,40 +254,63 @@ def matcher(path: str) -> bool:
return matcher


def match(pattern: str, path: str, **kwargs) -> bool:
def match(
pattern: str,
path: str,
*,
editorconfig: bool = False,
) -> bool:
"""
Test whether a path matches the given pattern.

Args:
pattern: The glob pattern to match against
path: The path to test
**kwargs: Optional features to enable
editorconfig: If True, uses editorconfig glob syntax rules for matching

Returns:
True if the path matches the pattern, False otherwise
"""
matcher = make_matcher(pattern, **kwargs)
matcher = make_matcher(
pattern,
editorconfig=editorconfig,
)
return matcher(path)


def filter(patterns: List[str], paths: List[str], **kwargs) -> List[str]:
def filter(
patterns: List[str],
paths: List[str],
*,
editorconfig: bool = False,
) -> List[str]:
"""
Filter a list of paths to those that match any of the given patterns.

Args:
patterns: List of glob patterns
paths: List of paths to filter
**kwargs: Optional features to enable
editorconfig: If True, uses editorconfig glob syntax rules for matching

Returns:
List of paths that match any of the patterns
"""
matchers = [make_matcher(pattern, **kwargs) for pattern in patterns]
matchers = [
make_matcher(
pattern,
editorconfig=editorconfig,
)
for pattern in patterns
]
return [path for path in paths if any(matcher(path) for matcher in matchers)]


def find(
patterns: List[str], root: str, paths: List[str] = None, **kwargs
patterns: List[str],
root: str,
paths: Optional[List[str]] = None,
*,
editorconfig: bool = False,
) -> List[str]:
"""
Find all files that match any of the given patterns.
Expand All @@ -288,13 +319,19 @@ def find(
patterns: List of glob patterns
root: Root directory to search (used when paths is None)
paths: Optional list of paths to check instead of walking filesystem
**kwargs: Optional features to enable
editorconfig: If True, uses editorconfig glob syntax rules for matching

Returns:
List of paths that match any of the patterns
"""
result = []
matchers = [make_matcher(pattern, **kwargs) for pattern in patterns]
matchers = [
make_matcher(
pattern,
editorconfig=editorconfig,
)
for pattern in patterns
]

if paths is not None:
# Use provided paths instead of walking filesystem
Expand Down
4 changes: 3 additions & 1 deletion codemcp/line_endings.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,9 @@ def check_gitattributes(file_path: str) -> Optional[str]:

# Use glob.match to check if the pattern matches the file
# Git patterns behave like gitignore patterns, so we don't enable editorconfig features
if pattern == "*" or glob_match(pattern, relative_path):
if pattern == "*" or glob_match(
pattern, relative_path, editorconfig=False
):
# Check for text/eol attributes
for attr in attrs:
if attr == "text=auto":
Expand Down
1 change: 0 additions & 1 deletion codemcp/tools/chmod.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#!/usr/bin/env python3

import logging
import os
import stat
from typing import Any, Literal
Expand Down
5 changes: 2 additions & 3 deletions codemcp/tools/grep.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ async def grep_files(
# Sort matches
# Use asyncio for getting file stats
import asyncio

loop = asyncio.get_event_loop()

# Get file stats asynchronously
Expand All @@ -199,9 +200,7 @@ async def grep_files(
matches_with_stats.sort(key=lambda x: x[0])
else:
# Sort by modification time (newest first), with filename as tiebreaker
matches_with_stats.sort(
key=lambda x: (-(x[1].st_mtime if x[1] else 0), x[0])
)
matches_with_stats.sort(key=lambda x: (-(x[1].st_mtime if x[1] else 0), x[0]))

matches = [match for match, _ in matches_with_stats]

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,4 @@ reportPrivateImportUsage = true
reportUntypedFunctionDecorator = true
reportFunctionMemberAccess = true
reportIncompatibleMethodOverride = true
stubPath = "./stubs"
3 changes: 3 additions & 0 deletions stubs/editorconfig/__init__.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from collections import OrderedDict

def get_properties(filename: str) -> OrderedDict[str, str]: ...
76 changes: 32 additions & 44 deletions tests/test_glob.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,51 +81,47 @@ def test_gitignore_middle_double_asterisk():
def test_editorconfig_braces():
"""Test editorconfig brace expansion."""
# {s1,s2,s3} should match any of the strings
assert glob.match("file.{txt,py}", "file.txt", editorconfig_braces=True)
assert glob.match("file.{txt,py}", "file.py", editorconfig_braces=True)
assert not glob.match("file.{txt,py}", "file.md", editorconfig_braces=True)
assert glob.match("file.{txt,py}", "file.txt", editorconfig=True)
assert glob.match("file.{txt,py}", "file.py", editorconfig=True)
assert not glob.match("file.{txt,py}", "file.md", editorconfig=True)

# {num1..num2} should match any integer in the range
assert glob.match("file{1..3}.txt", "file1.txt", editorconfig_braces=True)
assert glob.match("file{1..3}.txt", "file2.txt", editorconfig_braces=True)
assert glob.match("file{1..3}.txt", "file3.txt", editorconfig_braces=True)
assert not glob.match("file{1..3}.txt", "file4.txt", editorconfig_braces=True)
assert not glob.match("file{1..3}.txt", "file0.txt", editorconfig_braces=True)
assert glob.match("file{1..3}.txt", "file1.txt", editorconfig=True)
assert glob.match("file{1..3}.txt", "file2.txt", editorconfig=True)
assert glob.match("file{1..3}.txt", "file3.txt", editorconfig=True)
assert not glob.match("file{1..3}.txt", "file4.txt", editorconfig=True)
assert not glob.match("file{1..3}.txt", "file0.txt", editorconfig=True)

# Negative ranges
assert glob.match("file{-1..1}.txt", "file-1.txt", editorconfig_braces=True)
assert glob.match("file{-1..1}.txt", "file0.txt", editorconfig_braces=True)
assert glob.match("file{-1..1}.txt", "file1.txt", editorconfig_braces=True)
assert glob.match("file{-1..1}.txt", "file-1.txt", editorconfig=True)
assert glob.match("file{-1..1}.txt", "file0.txt", editorconfig=True)
assert glob.match("file{-1..1}.txt", "file1.txt", editorconfig=True)

# Braces can be nested
assert glob.match("file{a,{b,c}}.txt", "filea.txt", editorconfig_braces=True)
assert glob.match("file{a,{b,c}}.txt", "fileb.txt", editorconfig_braces=True)
assert glob.match("file{a,{b,c}}.txt", "filec.txt", editorconfig_braces=True)
assert glob.match("file{a,{b,c}}.txt", "filea.txt", editorconfig=True)
assert glob.match("file{a,{b,c}}.txt", "fileb.txt", editorconfig=True)
assert glob.match("file{a,{b,c}}.txt", "filec.txt", editorconfig=True)


def test_editorconfig_asterisk():
"""Test editorconfig asterisk behavior."""
# * should match any string including path separators
assert glob.match("*.txt", "file.txt", editorconfig_asterisk=True)
assert glob.match("*.txt", "dir/file.txt", editorconfig_asterisk=True)
assert not glob.match("*.txt", "file.py", editorconfig_asterisk=True)
assert glob.match("*.txt", "file.txt", editorconfig=True)
assert glob.match("*.txt", "dir/file.txt", editorconfig=True)
assert not glob.match("*.txt", "file.py", editorconfig=True)


def test_editorconfig_double_asterisk():
"""Test editorconfig ** behavior."""
# ** should match any string
assert glob.match("**", "file.txt", editorconfig_double_asterisk=True)
assert glob.match("**", "dir/file.txt", editorconfig_double_asterisk=True)
assert glob.match("**", "dir/subdir/file.txt", editorconfig_double_asterisk=True)
assert glob.match("**", "file.txt", editorconfig=True)
assert glob.match("**", "dir/file.txt", editorconfig=True)
assert glob.match("**", "dir/subdir/file.txt", editorconfig=True)

# More specific pattern with **
assert glob.match("a/**/file.txt", "a/file.txt", editorconfig_double_asterisk=True)
assert glob.match(
"a/**/file.txt", "a/b/file.txt", editorconfig_double_asterisk=True
)
assert glob.match(
"a/**/file.txt", "a/b/c/file.txt", editorconfig_double_asterisk=True
)
assert glob.match("a/**/file.txt", "a/file.txt", editorconfig=True)
assert glob.match("a/**/file.txt", "a/b/file.txt", editorconfig=True)
assert glob.match("a/**/file.txt", "a/b/c/file.txt", editorconfig=True)


def test_escaped_characters():
Expand All @@ -144,17 +140,15 @@ def test_escaped_characters():
def test_combined_features():
"""Test combining different pattern features."""
# Combining various features
assert glob.match("**/[a-z]/{file,test}.{txt,py}", "a/file.txt", editorconfig=True)
assert glob.match(
"**/[a-z]/{file,test}.{txt,py}", "a/file.txt", editorconfig_braces=True
)
assert glob.match(
"**/[a-z]/{file,test}.{txt,py}", "x/y/z/test.py", editorconfig_braces=True
"**/[a-z]/{file,test}.{txt,py}", "x/y/z/test.py", editorconfig=True
)
assert not glob.match(
"**/[a-z]/{file,test}.{txt,py}", "1/file.txt", editorconfig_braces=True
"**/[a-z]/{file,test}.{txt,py}", "1/file.txt", editorconfig=True
)
assert not glob.match(
"**/[a-z]/{file,test}.{txt,py}", "a/other.txt", editorconfig_braces=True
"**/[a-z]/{file,test}.{txt,py}", "a/other.txt", editorconfig=True
)


Expand Down Expand Up @@ -253,16 +247,10 @@ def test_complex_patterns():
assert not glob.match("**/a/**/b/**/c.txt", "a/b/d.txt")

# Combinations with editorconfig features
assert glob.match(
"**/{a,b}/**/*.{txt,md}", "a/x/y/file.txt", editorconfig_braces=True
)
assert glob.match("**/{a,b}/**/*.{txt,md}", "b/file.md", editorconfig_braces=True)
assert not glob.match(
"**/{a,b}/**/*.{txt,md}", "c/file.txt", editorconfig_braces=True
)
assert not glob.match(
"**/{a,b}/**/*.{txt,md}", "a/file.py", editorconfig_braces=True
)
assert glob.match("**/{a,b}/**/*.{txt,md}", "a/x/y/file.txt", editorconfig=True)
assert glob.match("**/{a,b}/**/*.{txt,md}", "b/file.md", editorconfig=True)
assert not glob.match("**/{a,b}/**/*.{txt,md}", "c/file.txt", editorconfig=True)
assert not glob.match("**/{a,b}/**/*.{txt,md}", "a/file.py", editorconfig=True)


def test_edge_cases():
Expand All @@ -281,7 +269,7 @@ def test_edge_cases():

# Just double asterisks
assert glob.match("**", "file.txt")
assert glob.match("**", "nested/file.txt", editorconfig_double_asterisk=True)
assert glob.match("**", "nested/file.txt", editorconfig=True)

# Pattern with just a slash
assert glob.match("/", "/")
Expand Down
Loading