Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 91 additions & 29 deletions .github/scripts/commit_prefix_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,30 +25,64 @@


# ------------------------------------------------
# Identify expected prefix dynamically from file paths
# Identify expected prefixes dynamically from file paths
# ------------------------------------------------
def infer_prefix_from_paths(paths):
"""
Returns:
- prefixes: a set of allowed prefixes (including build:)
- build_optional: True when commit subject does not need to be build:
(i.e., when any real component — lib/tests/plugins/src — is touched)
"""
prefixes = set()
component_prefixes = set()
build_seen = False

for raw in paths:
# Normalize path separators (Windows compatibility)
p = raw.replace(os.sep, "/")
basename = os.path.basename(p)

# ----- Any CMakeLists.txt → build: candidate -----
if basename == "CMakeLists.txt":
build_seen = True

# ----- lib/ → lib: -----
if p.startswith("lib/"):
component_prefixes.add("lib:")

# ----- tests/ → tests: -----
if p.startswith("tests/"):
component_prefixes.add("tests:")

for p in paths:
# ----- plugins/<name>/ → <name>: -----
if p.startswith("plugins/"):
parts = p.split("/")
prefix = parts[1]
prefixes.add(f"{prefix}:")
continue
if len(parts) > 1:
component_prefixes.add(f"{parts[1]}:")

# ----- src/ → flb_xxx.* → xxx: OR src/<dir>/ → <dir>: -----
if p.startswith("src/"):
filename = os.path.basename(p)
if filename.startswith("flb_"):
core = filename[4:].split(".")[0]
prefixes.add(f"{core}:")
continue
component_prefixes.add(f"{core}:")
else:
parts = p.split("/")
if len(parts) > 1:
component_prefixes.add(f"{parts[1]}:")

directory = p.split("/")[1]
prefixes.add(f"{directory}:")
continue
# prefixes = component prefixes + build: if needed
prefixes |= component_prefixes
if build_seen:
prefixes.add("build:")

return prefixes
# build_optional:
# True if ANY real component (lib/tests/plugins/src) was modified.
# False only when modifying build system files alone.
build_optional = len(component_prefixes) > 0

return prefixes, build_optional


# ------------------------------------------------
Expand Down Expand Up @@ -84,27 +118,27 @@ def detect_bad_squash(body):


# ------------------------------------------------
# Validate commit per test expectations
# Validate commit based on expected behavior and test rules
# ------------------------------------------------
def validate_commit(commit):
msg = commit.message.strip()
first_line, *rest = msg.split("\n")
body = "\n".join(rest)

# Subject must have prefix
# Subject must start with a prefix
subject_prefix_match = PREFIX_RE.match(first_line)
if not subject_prefix_match:
return False, f"Missing prefix in commit subject: '{first_line}'"

subject_prefix = subject_prefix_match.group()

# detect_bad_squash must run but
# validate_commit IGNORE bad-squash reason if it was "multiple sign-offs"
# Run squash detection (but ignore multi-signoff errors)
bad_squash, reason = detect_bad_squash(body)

# If bad squash was caused by prefix lines in body → FAIL
# If list of prefix lines in body → FAIL
if bad_squash:
# Prefix-like lines are always fatal
if "subject-like prefix" in reason:
return False, f"Bad squash detected: {reason}"

Expand All @@ -113,7 +147,7 @@ def validate_commit(commit):
# validate_commit ignores multi signoff warnings.
pass

# Subject length
# Subject length check
if len(first_line) > 80:
return False, f"Commit subject too long (>80 chars): '{first_line}'"

Expand All @@ -122,30 +156,58 @@ def validate_commit(commit):
if signoff_count == 0:
return False, "Missing Signed-off-by line"

# Determine expected prefix
# Determine expected prefixes + build option flag
files = commit.stats.files.keys()
expected = infer_prefix_from_paths(files)
expected, build_optional = infer_prefix_from_paths(files)

# Docs/CI changes
# When no prefix can be inferred (docs/tools), allow anything
if len(expected) == 0:
return True, ""

# *** TEST EXPECTATION ***
# For mixed components, DO NOT return custom message.
# Instead: same error shape as wrong-prefix case.
if len(expected) > 1:
# Always fail when multiple components are touched (even if prefix matches one)
expected_lower = {p.lower() for p in expected}
subj_lower = subject_prefix.lower()

# ------------------------------------------------
# Multiple-component detection
# ------------------------------------------------
# Treat pure build-related prefixes ("build:", "CMakeLists.txt:") as non-components.
# Additionally, allow lib: to act as an umbrella for lib subcomponents
# (e.g., ripser:, ripser_wrapper:) when subject prefix is lib:.
non_build_prefixes = {
p
for p in expected_lower
if p not in ("build:", "cmakelists.txt:")
}

# Prefixes that are allowed to cover multiple subcomponents
umbrella_prefixes = {"lib:"}

# If more than one non-build prefix is inferred AND the subject is not an umbrella
# prefix, require split commits.
if len(non_build_prefixes) > 1 and subj_lower not in umbrella_prefixes:
expected_list = sorted(expected)
expected_str = ", ".join(expected_list)
return False, (
f"Subject prefix '{subject_prefix}' does not match files changed.\n"
f"Expected one of: {expected_str}"
)

# Subject prefix must be one of the expected ones
if subj_lower not in expected_lower:
expected_list = sorted(expected)
expected_str = ", ".join(expected_list)
return False, (
f"Subject prefix '{subject_prefix}' does not match files changed.\n"
f"Expected one of: {', '.join(sorted(expected))}"
f"Expected one of: {expected_str}"
)

# Normal prefix mismatch (case-insensitive comparison)
only_expected = next(iter(expected))
if subject_prefix.lower() != only_expected.lower():

# If build is NOT optional and build: exists among expected,
# then subject MUST be build:
if not build_optional and "build:" in expected_lower and subj_lower != "build:":
return False, (
f"Subject prefix '{subject_prefix}' does not match files changed.\n"
f"Expected one of: {only_expected}"
f"Expected one of: build:"
)

return True, ""
Expand Down
108 changes: 46 additions & 62 deletions .github/scripts/tests/test_commit_lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,39 +34,37 @@ def test_infer_prefix_plugin():
When a file is in plugins/<name>/, the prefix should be <name>:.
This is the most common case for Fluent Bit commits modifying plugins.
"""
assert infer_prefix_from_paths(["plugins/out_s3/s3.c"]) == {"out_s3:"}
prefixes, build_optional = infer_prefix_from_paths(["plugins/out_s3/s3.c"])
assert prefixes == {"out_s3:"}
assert build_optional is True

def test_infer_prefix_core_file():
"""
Test that core source files with flb_ prefix correctly infer the component name.

Files like src/flb_router.c should produce prefix "router:" by stripping
the "flb_" prefix and file extension. This handles core library components.
"""
assert infer_prefix_from_paths(["src/flb_router.c"]) == {"router:"}
prefixes, build_optional = infer_prefix_from_paths(["src/flb_router.c"])
assert prefixes == {"router:"}
assert build_optional is True

def test_infer_prefix_new_core_file():
"""
Test that core files with longer names and numbers are handled correctly.

Ensures the prefix inference works for files like flb_super_router2.c,
extracting "super_router2:" correctly. This validates the regex handles
underscores and numbers in component names.
"""
assert infer_prefix_from_paths(["src/flb_super_router2.c"]) == {"super_router2:"}
prefixes, build_optional = infer_prefix_from_paths(["src/flb_super_router2.c"])
assert prefixes == {"super_router2:"}
assert build_optional is True

def test_infer_multiple_prefixes():
"""
Test that multiple files from different components produce multiple prefixes.

When a commit touches files from different components (e.g., a plugin and
a core file), the inference should return all relevant prefixes. This helps
detect commits that mix multiple subsystems, which should be split.
"""
assert infer_prefix_from_paths([
prefixes, build_optional = infer_prefix_from_paths([
"plugins/in_tail/tail.c",
"src/flb_router.c"
]) == {"in_tail:", "router:"}
])
assert prefixes == {"in_tail:", "router:"}
# At least one real component touched → build is optional
assert build_optional is True


# -----------------------------------------------------------
Expand Down Expand Up @@ -251,20 +249,17 @@ def test_error_bad_squash_detected():

def test_error_multiple_prefixes_inferred_from_files():
"""
Test that commits touching multiple components are rejected.

When a commit modifies files from different components (e.g., both a plugin
and core code), it should be split into separate commits. This keeps
commits focused and makes reviews easier. The error message should list
all expected prefixes.
Commits touching multiple non-build components are rejected and must be
split into separate commits, even if the subject matches one component.
"""
commit = make_commit(
"in_tail: update handler\n\nSigned-off-by: User",
["plugins/in_tail/tail.c", "src/flb_router.c"]
)
ok, msg = validate_commit(commit)
assert ok is False
assert "Expected one of:" in msg
assert "does not match files changed" in msg



# -----------------------------------------------------------
Expand Down Expand Up @@ -295,77 +290,66 @@ def test_docs_or_ci_changes_allowed():
def test_infer_prefix_empty_file_list():
"""
Test that an empty file list returns an empty prefix set.

Edge case: when no files are provided, the function should return
an empty set rather than raising an error. This handles degenerate cases.
"""
assert infer_prefix_from_paths([]) == set()
prefixes, build_optional = infer_prefix_from_paths([])
assert prefixes == set()
# No components, no CMakeLists → build not optional
assert build_optional is False

def test_infer_prefix_src_subdirectory():
"""
Test prefix inference for files in src/ subdirectories.

Files in src/ subdirectories (like src/stream_processor/stream.c) that
don't have the flb_ prefix should use the subdirectory name as the prefix.
This handles organized core code that's not in the root src/ directory.
"""
assert infer_prefix_from_paths(["src/stream_processor/stream.c"]) == {"stream_processor:"}
prefixes, build_optional = infer_prefix_from_paths(["src/stream_processor/stream.c"])
assert prefixes == {"stream_processor:"}
assert build_optional is True

def test_infer_prefix_unknown_paths():
"""
Test that files outside plugins/ and src/ don't generate prefixes.

Files in unknown locations (not plugins/ or src/) should not generate
any prefix. This allows commits with only documentation, CI, or other
non-code files to use generic prefixes.
"""
assert infer_prefix_from_paths(["random/file.c"]) == set()
prefixes, build_optional = infer_prefix_from_paths(["random/file.c"])
assert prefixes == set()
assert build_optional is False

def test_infer_prefix_multiple_same_plugin():
"""
Test that multiple files from the same plugin yield a single prefix.

When a commit modifies multiple files within the same plugin directory
(e.g., .c, .h, and config files), they should all produce the same prefix.
This ensures commits modifying a plugin's internal structure are valid.
"""
assert infer_prefix_from_paths([
prefixes, build_optional = infer_prefix_from_paths([
"plugins/out_s3/s3.c",
"plugins/out_s3/s3.h",
"plugins/out_s3/config.c"
]) == {"out_s3:"}
])
assert prefixes == {"out_s3:"}
assert build_optional is True

def test_infer_prefix_plugin_with_underscores():
"""
Test that plugin names with underscores are handled correctly.

Plugin names can contain underscores (e.g., out_http). The prefix inference
should preserve these underscores in the generated prefix.
"""
assert infer_prefix_from_paths(["plugins/out_http/http.c"]) == {"out_http:"}
prefixes, build_optional = infer_prefix_from_paths(["plugins/out_http/http.c"])
assert prefixes == {"out_http:"}
assert build_optional is True

def test_infer_prefix_core_file_with_numbers():
"""
Test that core file names with numbers are handled correctly.

Core files like flb_http2.c should produce "http2:" (not "http2.c:").
This validates that numbers in component names are preserved correctly.
"""
assert infer_prefix_from_paths(["src/flb_http2.c"]) == {"http2:"}
prefixes, build_optional = infer_prefix_from_paths(["src/flb_http2.c"])
assert prefixes == {"http2:"}
assert build_optional is True

def test_infer_prefix_mixed_known_unknown():
"""
Test prefix inference with a mix of known and unknown file paths.

When a commit contains both files that generate prefixes (plugins/, src/)
and files that don't (docs/, random files), only the known paths should
contribute to the prefix set. Unknown paths are ignored.
"""
result = infer_prefix_from_paths([
prefixes, build_optional = infer_prefix_from_paths([
"plugins/in_tail/tail.c",
"random/file.txt"
])
assert result == {"in_tail:"}
assert prefixes == {"in_tail:"}
assert build_optional is True


# -----------------------------------------------------------
Expand Down Expand Up @@ -620,12 +604,12 @@ def test_valid_config_file_changes():

def test_error_multiple_prefixes_one_matches():
"""
Test that commits touching multiple components fail even if prefix matches one.
When a commit touches multiple different components (e.g., a plugin and a
core subsystem), the linter requires the commit to be split, even if the
subject prefix matches one of those components.

When a commit modifies files from different components, it should be rejected
even if the commit prefix matches one of the components. The error message
should list all expected prefixes to help the developer split the commit.
This enforces the principle of one logical change per commit.
In this case, both 'in_tail:' and 'router:' are valid inferred prefixes,
so the linter must reject the commit and report all expected prefixes.
"""
commit = make_commit(
"in_tail: update\n\nSigned-off-by: User",
Expand Down
Loading