diff --git a/.github/scripts/commit_prefix_check.py b/.github/scripts/commit_prefix_check.py index 338d00dea9f..fffba6d87ae 100644 --- a/.github/scripts/commit_prefix_check.py +++ b/.github/scripts/commit_prefix_check.py @@ -25,30 +25,64 @@ # ------------------------------------------------ -# Identify expected prefix dynamically from file paths +# Identify expected prefixes dynamically from file paths # ------------------------------------------------ def infer_prefix_from_paths(paths): + """ + Returns: + - prefixes: a set of allowed prefixes (including build:) + - build_optional: True when commit subject does not need to be build: + (i.e., when any real component — lib/tests/plugins/src — is touched) + """ prefixes = set() + component_prefixes = set() + build_seen = False + + for raw in paths: + # Normalize path separators (Windows compatibility) + p = raw.replace(os.sep, "/") + basename = os.path.basename(p) + + # ----- Any CMakeLists.txt → build: candidate ----- + if basename == "CMakeLists.txt": + build_seen = True + + # ----- lib/ → lib: ----- + if p.startswith("lib/"): + component_prefixes.add("lib:") + + # ----- tests/ → tests: ----- + if p.startswith("tests/"): + component_prefixes.add("tests:") - for p in paths: + # ----- plugins// → : ----- if p.startswith("plugins/"): parts = p.split("/") - prefix = parts[1] - prefixes.add(f"{prefix}:") - continue + if len(parts) > 1: + component_prefixes.add(f"{parts[1]}:") + # ----- src/ → flb_xxx.* → xxx: OR src// → : ----- if p.startswith("src/"): filename = os.path.basename(p) if filename.startswith("flb_"): core = filename[4:].split(".")[0] - prefixes.add(f"{core}:") - continue + component_prefixes.add(f"{core}:") + else: + parts = p.split("/") + if len(parts) > 1: + component_prefixes.add(f"{parts[1]}:") - directory = p.split("/")[1] - prefixes.add(f"{directory}:") - continue + # prefixes = component prefixes + build: if needed + prefixes |= component_prefixes + if build_seen: + prefixes.add("build:") - return prefixes + # build_optional: + # True if ANY real component (lib/tests/plugins/src) was modified. + # False only when modifying build system files alone. + build_optional = len(component_prefixes) > 0 + + return prefixes, build_optional # ------------------------------------------------ @@ -84,27 +118,27 @@ def detect_bad_squash(body): # ------------------------------------------------ -# Validate commit per test expectations +# Validate commit based on expected behavior and test rules # ------------------------------------------------ def validate_commit(commit): msg = commit.message.strip() first_line, *rest = msg.split("\n") body = "\n".join(rest) - # Subject must have prefix + # Subject must start with a prefix subject_prefix_match = PREFIX_RE.match(first_line) if not subject_prefix_match: return False, f"Missing prefix in commit subject: '{first_line}'" subject_prefix = subject_prefix_match.group() - # detect_bad_squash must run but - # validate_commit IGNORE bad-squash reason if it was "multiple sign-offs" + # Run squash detection (but ignore multi-signoff errors) bad_squash, reason = detect_bad_squash(body) # If bad squash was caused by prefix lines in body → FAIL # If list of prefix lines in body → FAIL if bad_squash: + # Prefix-like lines are always fatal if "subject-like prefix" in reason: return False, f"Bad squash detected: {reason}" @@ -113,7 +147,7 @@ def validate_commit(commit): # validate_commit ignores multi signoff warnings. pass - # Subject length + # Subject length check if len(first_line) > 80: return False, f"Commit subject too long (>80 chars): '{first_line}'" @@ -122,30 +156,58 @@ def validate_commit(commit): if signoff_count == 0: return False, "Missing Signed-off-by line" - # Determine expected prefix + # Determine expected prefixes + build option flag files = commit.stats.files.keys() - expected = infer_prefix_from_paths(files) + expected, build_optional = infer_prefix_from_paths(files) - # Docs/CI changes + # When no prefix can be inferred (docs/tools), allow anything if len(expected) == 0: return True, "" - # *** TEST EXPECTATION *** - # For mixed components, DO NOT return custom message. - # Instead: same error shape as wrong-prefix case. - if len(expected) > 1: - # Always fail when multiple components are touched (even if prefix matches one) + expected_lower = {p.lower() for p in expected} + subj_lower = subject_prefix.lower() + + # ------------------------------------------------ + # Multiple-component detection + # ------------------------------------------------ + # Treat pure build-related prefixes ("build:", "CMakeLists.txt:") as non-components. + # Additionally, allow lib: to act as an umbrella for lib subcomponents + # (e.g., ripser:, ripser_wrapper:) when subject prefix is lib:. + non_build_prefixes = { + p + for p in expected_lower + if p not in ("build:", "cmakelists.txt:") + } + + # Prefixes that are allowed to cover multiple subcomponents + umbrella_prefixes = {"lib:"} + + # If more than one non-build prefix is inferred AND the subject is not an umbrella + # prefix, require split commits. + if len(non_build_prefixes) > 1 and subj_lower not in umbrella_prefixes: + expected_list = sorted(expected) + expected_str = ", ".join(expected_list) + return False, ( + f"Subject prefix '{subject_prefix}' does not match files changed.\n" + f"Expected one of: {expected_str}" + ) + + # Subject prefix must be one of the expected ones + if subj_lower not in expected_lower: + expected_list = sorted(expected) + expected_str = ", ".join(expected_list) return False, ( f"Subject prefix '{subject_prefix}' does not match files changed.\n" - f"Expected one of: {', '.join(sorted(expected))}" + f"Expected one of: {expected_str}" ) - # Normal prefix mismatch (case-insensitive comparison) - only_expected = next(iter(expected)) - if subject_prefix.lower() != only_expected.lower(): + + # If build is NOT optional and build: exists among expected, + # then subject MUST be build: + if not build_optional and "build:" in expected_lower and subj_lower != "build:": return False, ( f"Subject prefix '{subject_prefix}' does not match files changed.\n" - f"Expected one of: {only_expected}" + f"Expected one of: build:" ) return True, "" diff --git a/.github/scripts/tests/test_commit_lint.py b/.github/scripts/tests/test_commit_lint.py index 438b17c1101..575a7f814b2 100644 --- a/.github/scripts/tests/test_commit_lint.py +++ b/.github/scripts/tests/test_commit_lint.py @@ -34,39 +34,37 @@ def test_infer_prefix_plugin(): When a file is in plugins//, the prefix should be :. This is the most common case for Fluent Bit commits modifying plugins. """ - assert infer_prefix_from_paths(["plugins/out_s3/s3.c"]) == {"out_s3:"} + prefixes, build_optional = infer_prefix_from_paths(["plugins/out_s3/s3.c"]) + assert prefixes == {"out_s3:"} + assert build_optional is True def test_infer_prefix_core_file(): """ Test that core source files with flb_ prefix correctly infer the component name. - - Files like src/flb_router.c should produce prefix "router:" by stripping - the "flb_" prefix and file extension. This handles core library components. """ - assert infer_prefix_from_paths(["src/flb_router.c"]) == {"router:"} + prefixes, build_optional = infer_prefix_from_paths(["src/flb_router.c"]) + assert prefixes == {"router:"} + assert build_optional is True def test_infer_prefix_new_core_file(): """ Test that core files with longer names and numbers are handled correctly. - - Ensures the prefix inference works for files like flb_super_router2.c, - extracting "super_router2:" correctly. This validates the regex handles - underscores and numbers in component names. """ - assert infer_prefix_from_paths(["src/flb_super_router2.c"]) == {"super_router2:"} + prefixes, build_optional = infer_prefix_from_paths(["src/flb_super_router2.c"]) + assert prefixes == {"super_router2:"} + assert build_optional is True def test_infer_multiple_prefixes(): """ Test that multiple files from different components produce multiple prefixes. - - When a commit touches files from different components (e.g., a plugin and - a core file), the inference should return all relevant prefixes. This helps - detect commits that mix multiple subsystems, which should be split. """ - assert infer_prefix_from_paths([ + prefixes, build_optional = infer_prefix_from_paths([ "plugins/in_tail/tail.c", "src/flb_router.c" - ]) == {"in_tail:", "router:"} + ]) + assert prefixes == {"in_tail:", "router:"} + # At least one real component touched → build is optional + assert build_optional is True # ----------------------------------------------------------- @@ -251,12 +249,8 @@ def test_error_bad_squash_detected(): def test_error_multiple_prefixes_inferred_from_files(): """ - Test that commits touching multiple components are rejected. - - When a commit modifies files from different components (e.g., both a plugin - and core code), it should be split into separate commits. This keeps - commits focused and makes reviews easier. The error message should list - all expected prefixes. + Commits touching multiple non-build components are rejected and must be + split into separate commits, even if the subject matches one component. """ commit = make_commit( "in_tail: update handler\n\nSigned-off-by: User", @@ -264,7 +258,8 @@ def test_error_multiple_prefixes_inferred_from_files(): ) ok, msg = validate_commit(commit) assert ok is False - assert "Expected one of:" in msg + assert "does not match files changed" in msg + # ----------------------------------------------------------- @@ -295,77 +290,66 @@ def test_docs_or_ci_changes_allowed(): def test_infer_prefix_empty_file_list(): """ Test that an empty file list returns an empty prefix set. - - Edge case: when no files are provided, the function should return - an empty set rather than raising an error. This handles degenerate cases. """ - assert infer_prefix_from_paths([]) == set() + prefixes, build_optional = infer_prefix_from_paths([]) + assert prefixes == set() + # No components, no CMakeLists → build not optional + assert build_optional is False def test_infer_prefix_src_subdirectory(): """ Test prefix inference for files in src/ subdirectories. - - Files in src/ subdirectories (like src/stream_processor/stream.c) that - don't have the flb_ prefix should use the subdirectory name as the prefix. - This handles organized core code that's not in the root src/ directory. """ - assert infer_prefix_from_paths(["src/stream_processor/stream.c"]) == {"stream_processor:"} + prefixes, build_optional = infer_prefix_from_paths(["src/stream_processor/stream.c"]) + assert prefixes == {"stream_processor:"} + assert build_optional is True def test_infer_prefix_unknown_paths(): """ Test that files outside plugins/ and src/ don't generate prefixes. - - Files in unknown locations (not plugins/ or src/) should not generate - any prefix. This allows commits with only documentation, CI, or other - non-code files to use generic prefixes. """ - assert infer_prefix_from_paths(["random/file.c"]) == set() + prefixes, build_optional = infer_prefix_from_paths(["random/file.c"]) + assert prefixes == set() + assert build_optional is False def test_infer_prefix_multiple_same_plugin(): """ Test that multiple files from the same plugin yield a single prefix. - - When a commit modifies multiple files within the same plugin directory - (e.g., .c, .h, and config files), they should all produce the same prefix. - This ensures commits modifying a plugin's internal structure are valid. """ - assert infer_prefix_from_paths([ + prefixes, build_optional = infer_prefix_from_paths([ "plugins/out_s3/s3.c", "plugins/out_s3/s3.h", "plugins/out_s3/config.c" - ]) == {"out_s3:"} + ]) + assert prefixes == {"out_s3:"} + assert build_optional is True def test_infer_prefix_plugin_with_underscores(): """ Test that plugin names with underscores are handled correctly. - - Plugin names can contain underscores (e.g., out_http). The prefix inference - should preserve these underscores in the generated prefix. """ - assert infer_prefix_from_paths(["plugins/out_http/http.c"]) == {"out_http:"} + prefixes, build_optional = infer_prefix_from_paths(["plugins/out_http/http.c"]) + assert prefixes == {"out_http:"} + assert build_optional is True def test_infer_prefix_core_file_with_numbers(): """ Test that core file names with numbers are handled correctly. - - Core files like flb_http2.c should produce "http2:" (not "http2.c:"). - This validates that numbers in component names are preserved correctly. """ - assert infer_prefix_from_paths(["src/flb_http2.c"]) == {"http2:"} + prefixes, build_optional = infer_prefix_from_paths(["src/flb_http2.c"]) + assert prefixes == {"http2:"} + assert build_optional is True def test_infer_prefix_mixed_known_unknown(): """ Test prefix inference with a mix of known and unknown file paths. - - When a commit contains both files that generate prefixes (plugins/, src/) - and files that don't (docs/, random files), only the known paths should - contribute to the prefix set. Unknown paths are ignored. """ - result = infer_prefix_from_paths([ + prefixes, build_optional = infer_prefix_from_paths([ "plugins/in_tail/tail.c", "random/file.txt" ]) - assert result == {"in_tail:"} + assert prefixes == {"in_tail:"} + assert build_optional is True # ----------------------------------------------------------- @@ -620,12 +604,12 @@ def test_valid_config_file_changes(): def test_error_multiple_prefixes_one_matches(): """ - Test that commits touching multiple components fail even if prefix matches one. + When a commit touches multiple different components (e.g., a plugin and a + core subsystem), the linter requires the commit to be split, even if the + subject prefix matches one of those components. - When a commit modifies files from different components, it should be rejected - even if the commit prefix matches one of the components. The error message - should list all expected prefixes to help the developer split the commit. - This enforces the principle of one logical change per commit. + In this case, both 'in_tail:' and 'router:' are valid inferred prefixes, + so the linter must reject the commit and report all expected prefixes. """ commit = make_commit( "in_tail: update\n\nSigned-off-by: User",