diff --git a/ggshield/verticals/secret/secret_scan_collection.py b/ggshield/verticals/secret/secret_scan_collection.py index 0736087518..974c7b3f06 100644 --- a/ggshield/verticals/secret/secret_scan_collection.py +++ b/ggshield/verticals/secret/secret_scan_collection.py @@ -84,7 +84,8 @@ def from_scan_result( cls, file: Scannable, scan_result: ScanResult, secret_config: SecretConfig ): """Creates a Result from a Scannable and a ScanResult. - Removes ignored policy breaks + - Removes ignored policy breaks + - replace matches by ExtendedMatches """ to_keep = [] diff --git a/tests/factories.py b/tests/factories.py new file mode 100644 index 0000000000..12f9c492a6 --- /dev/null +++ b/tests/factories.py @@ -0,0 +1,83 @@ +import random + +import factory +import factory.fuzzy +from pygitguardian.models import Match, PolicyBreak, ScanResult + +from ggshield.core.scan.scannable import StringScannable +from ggshield.utils.git_shell import Filemode + + +def get_line_index(content, index): + """Return the index of the line containing the caracter at the given index""" + current_line_index = 0 + lines = content.splitlines(keepends=True) + while True: + line = lines.pop(0) + if index <= len(line): + return current_line_index + index -= len(line) + current_line_index += 1 + + +class ScannableFactory(factory.Factory): + class Meta: + model = StringScannable + + url = factory.Faker("hostname") + content = factory.Faker("text") + # Only returning FILE for new, since diff would need a custom content + filemode = Filemode.FILE + + +class MatchFactory(factory.Factory): + class Meta: + model = Match + + content = factory.Faker("text") + match_len = factory.fuzzy.FuzzyInteger(5, 15) + index_start = factory.lazy_attribute( + lambda obj: random.randint(0, len(obj.content) - obj.match_len) + ) + index_end = factory.lazy_attribute(lambda obj: obj.index_start + obj.match_len) + match = factory.lazy_attribute( + lambda obj: obj.content[obj.index_start : obj.index_end] + ) + line_start = factory.lazy_attribute( + lambda obj: get_line_index(obj.content, obj.index_start) + ) + line_end = factory.lazy_attribute( + lambda obj: get_line_index(obj.content, obj.index_end) + ) + match_type = factory.fuzzy.FuzzyText(length=10) + + +class PolicyBreakFactory(factory.Factory): + class Meta: + model = PolicyBreak + + break_type = "a" + policy = "Secrets detection" + validity = "valid" + known_secret = False + incident_url = None + is_excluded = False + exclude_reason = None + diff_kind = None + content = factory.Faker("text") + nb_matches = factory.fuzzy.FuzzyInteger(1, 2) + + @factory.lazy_attribute + def matches(self): + # Note: matches may overlap + return [MatchFactory(content=self.content) for _ in range(self.nb_matches)] + + +class ScanResultFactory(factory.Factory): + class Meta: + model = ScanResult + + policy_break_count = factory.lazy_attribute(lambda obj: len(obj.policy_breaks)) + policy_breaks = [] + policies = ["Secrets detection"] + is_diff = False diff --git a/tests/test_factories.py b/tests/test_factories.py new file mode 100644 index 0000000000..811984e379 --- /dev/null +++ b/tests/test_factories.py @@ -0,0 +1,23 @@ +import pytest + +from tests.factories import get_line_index + + +TEST_CONTENT = """aaa +bb +cccc""" + + +@pytest.mark.parametrize( + ("index", "expected_line_index"), + ( + (1, 0), + (4, 0), + (5, 1), + (7, 1), + (8, 2), + (11, 2), + ), +) +def test_get_line_index(index, expected_line_index): + assert get_line_index(TEST_CONTENT, index) == expected_line_index diff --git a/tests/unit/verticals/secret/test_secret_scan_collection.py b/tests/unit/verticals/secret/test_secret_scan_collection.py index 3733449bb3..5193aa4413 100644 --- a/tests/unit/verticals/secret/test_secret_scan_collection.py +++ b/tests/unit/verticals/secret/test_secret_scan_collection.py @@ -4,10 +4,12 @@ from pygitguardian.models import ScanResult from ggshield.core.config.user_config import SecretConfig +from ggshield.core.filter import get_ignore_sha from ggshield.core.scan import StringScannable from ggshield.core.types import IgnoredMatch from ggshield.verticals.secret import Results -from ggshield.verticals.secret.secret_scan_collection import Result +from ggshield.verticals.secret.secret_scan_collection import IgnoreReason, Result +from tests.factories import PolicyBreakFactory, ScannableFactory, ScanResultFactory from tests.unit.conftest import ( _ONE_LINE_AND_MULTILINE_PATCH_CONTENT, _ONE_LINE_AND_MULTILINE_PATCH_SCAN_RESULT, @@ -79,7 +81,7 @@ def test_results_from_exception(): ), ], ) -def test_create_result_remove_ignores( +def test_create_result_removes_ignored_matches( content: str, scan_result: ScanResult, ignores: Iterable, final_len: int ) -> None: result = Result.from_scan_result( @@ -92,5 +94,37 @@ def test_create_result_remove_ignores( assert len(result.policy_breaks) == final_len -def test_ignore_all_secrets(): - pass +@pytest.mark.parametrize("all_secrets", (True, False)) +def test_create_result_removes_ignored_matches_bis(all_secrets): + """ + GIVEN two different policy breaks + WHEN ignoring the first one + THEN it is ignored iff all_secrets is false + """ + scannable = ScannableFactory() + policy_breaks = PolicyBreakFactory.create_batch(2, content=scannable.content) + + # ensure policy breaks are different + if policy_breaks[0].matches[0].match_type == policy_breaks[1].matches[0].match_type: + policy_breaks[0].matches[0].match_type += "a" + + config = SecretConfig( + ignored_matches=[ + IgnoredMatch(name="x", match=get_ignore_sha(policy_breaks[0])) + ], + all_secrets=all_secrets, + ) + result = Result.from_scan_result( + scannable, ScanResultFactory(policy_breaks=policy_breaks), config + ) + if all_secrets: + assert len(result.policy_breaks) == 2 + assert result.policy_breaks[0].is_excluded is True + assert result.policy_breaks[1].is_excluded is False + else: + assert len(result.policy_breaks) == 1 + assert result.policy_breaks[0].is_excluded is False + assert ( + result.ignored_policy_breaks_count_by_reason[IgnoreReason.IGNORED_MATCH] + == 1 + )