chore: add tests, with factories !

GitGuardian · Dec 2, 2024 · 1047a26 · 1047a26
1 parent c198bb0
commit 1047a26
Show file tree

Hide file tree

Showing 4 changed files with 157 additions and 5 deletions.
diff --git a/ggshield/verticals/secret/secret_scan_collection.py b/ggshield/verticals/secret/secret_scan_collection.py
@@ -84,7 +84,8 @@ def from_scan_result(
         cls, file: Scannable, scan_result: ScanResult, secret_config: SecretConfig
     ):
         """Creates a Result from a Scannable and a ScanResult.
-        Removes ignored policy breaks
+        - Removes ignored policy breaks
+        - replace matches by ExtendedMatches
         """
 
         to_keep = []

diff --git a/tests/factories.py b/tests/factories.py
@@ -0,0 +1,94 @@
+import random
+
+import factory
+import factory.fuzzy
+from pygitguardian.models import Match, PolicyBreak, ScanResult
+
+from ggshield.core.scan.scannable import StringScannable
+from ggshield.utils.git_shell import Filemode
+
+
+def get_line_index(content, index):
+    """Return the index of the line containing the caracter at the given index"""
+    current_line_index = 0
+    lines = content.splitlines(keepends=True)
+    while True:
+        line = lines.pop(0)
+        if index <= len(line):
+            return current_line_index
+        index -= len(line)
+        current_line_index += 1
+
+
+LOREM_CONTENT = """
+Lorem Ipsum is simply dummy text of the printing and typesetting industry.
+Lorem Ipsum has been the industry's standard dummy text ever since the 1500s,
+when an unknown printer took a galley of type and scrambled it to make a type
+specimen book. It has survived not only five centuries, but also the leap
+into electronic typesetting, remaining essentially unchanged. It was popularised
+in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages,
+and more recently with desktop publishing software like Aldus PageMaker including
+versions of Lorem Ipsum.
+"""
+
+
+class ScannableFactory(factory.Factory):
+    class Meta:
+        model = StringScannable
+
+    url = "localhost"
+    content = LOREM_CONTENT
+    filemode = Filemode.FILE
+
+
+class MatchFactory(factory.Factory):
+    class Meta:
+        model = Match
+
+    content = factory.Faker("text")
+    match_len = factory.fuzzy.FuzzyInteger(5, 15)
+    index_start = factory.lazy_attribute(
+        lambda obj: random.randint(0, len(obj.content) - obj.match_len)
+    )
+    index_end = factory.lazy_attribute(lambda obj: obj.index_start + obj.match_len)
+    match = factory.lazy_attribute(
+        lambda obj: obj.content[obj.index_start : obj.index_end]
+    )
+    line_start = factory.lazy_attribute(
+        lambda obj: get_line_index(obj.content, obj.index_start)
+    )
+    line_end = factory.lazy_attribute(
+        lambda obj: get_line_index(obj.content, obj.index_end)
+    )
+    match_type = factory.fuzzy.FuzzyText(length=10)
+
+
+class PolicyBreakFactory(factory.Factory):
+    class Meta:
+        model = PolicyBreak
+
+    break_type = "a"
+    policy = "Secrets detection"
+    validity = "valid"
+    known_secret = False
+    incident_url = None
+    is_excluded = False
+    exclude_reason = None
+    diff_kind = None
+    content = factory.Faker("text")
+    nb_matches = factory.fuzzy.FuzzyInteger(1, 2)
+
+    @factory.lazy_attribute
+    def matches(self):
+        # Note: matches may overlap
+        return [MatchFactory(content=self.content) for _ in range(self.nb_matches)]
+
+
+class ScanResultFactory(factory.Factory):
+    class Meta:
+        model = ScanResult
+
+    policy_break_count = factory.lazy_attribute(lambda obj: len(obj.policy_breaks))
+    policy_breaks = []
+    policies = ["Secrets detection"]
+    is_diff = False
diff --git a/tests/test_factories.py b/tests/test_factories.py
@@ -0,0 +1,23 @@
+import pytest
+
+from tests.factories import get_line_index
+
+
+TEST_CONTENT = """aaa
+bb
+cccc"""
+
+
+@pytest.mark.parametrize(
+    ("index", "expected_line_index"),
+    (
+        (1, 0),
+        (4, 0),
+        (5, 1),
+        (7, 1),
+        (8, 2),
+        (11, 2),
+    ),
+)
+def test_get_line_index(index, expected_line_index):
+    assert get_line_index(TEST_CONTENT, index) == expected_line_index
diff --git a/tests/unit/verticals/secret/test_secret_scan_collection.py b/tests/unit/verticals/secret/test_secret_scan_collection.py
@@ -4,10 +4,12 @@
 from pygitguardian.models import ScanResult
 
 from ggshield.core.config.user_config import SecretConfig
+from ggshield.core.filter import get_ignore_sha
 from ggshield.core.scan import StringScannable
 from ggshield.core.types import IgnoredMatch
 from ggshield.verticals.secret import Results
-from ggshield.verticals.secret.secret_scan_collection import Result
+from ggshield.verticals.secret.secret_scan_collection import IgnoreReason, Result
+from tests.factories import PolicyBreakFactory, ScannableFactory, ScanResultFactory
 from tests.unit.conftest import (
     _ONE_LINE_AND_MULTILINE_PATCH_CONTENT,
     _ONE_LINE_AND_MULTILINE_PATCH_SCAN_RESULT,
@@ -79,7 +81,7 @@ def test_results_from_exception():
         ),
     ],
 )
-def test_create_result_remove_ignores(
+def test_create_result_removes_ignored_matches(
     content: str, scan_result: ScanResult, ignores: Iterable, final_len: int
 ) -> None:
     result = Result.from_scan_result(
@@ -92,5 +94,37 @@ def test_create_result_remove_ignores(
     assert len(result.policy_breaks) == final_len
 
 
-def test_ignore_all_secrets():
-    pass
+@pytest.mark.parametrize("all_secrets", (True, False))
+def test_create_result_removes_ignored_matches_bis(all_secrets):
+    """
+    GIVEN two different policy breaks
+    WHEN ignoring the first one
+    THEN it is ignored iff all_secrets is false
+    """
+    scannable = ScannableFactory()
+    policy_breaks = PolicyBreakFactory.create_batch(2, content=scannable.content)
+
+    # ensure policy breaks are different
+    if policy_breaks[0].matches[0].match_type == policy_breaks[1].matches[0].match_type:
+        policy_breaks[0].matches[0].match_type += "a"
+
+    config = SecretConfig(
+        ignored_matches=[
+            IgnoredMatch(name="x", match=get_ignore_sha(policy_breaks[0]))
+        ],
+        all_secrets=all_secrets,
+    )
+    result = Result.from_scan_result(
+        scannable, ScanResultFactory(policy_breaks=policy_breaks), config
+    )
+    if all_secrets:
+        assert len(result.policy_breaks) == 2
+        assert result.policy_breaks[0].is_excluded is True
+        assert result.policy_breaks[1].is_excluded is False
+    else:
+        assert len(result.policy_breaks) == 1
+        assert result.policy_breaks[0].is_excluded is False
+        assert (
+            result.ignored_policy_breaks_count_by_reason[IgnoreReason.IGNORED_MATCH]
+            == 1
+        )