From f9bb794399fc7ded808b9b531051c0e7a5248195 Mon Sep 17 00:00:00 2001
From: Mathias Millet <mathias.millet@gitguardian.com>
Date: Fri, 29 Nov 2024 17:54:02 +0100
Subject: [PATCH] chore: add tests, with factories !

---
 .../secret/secret_scan_collection.py          |  3 +-
 tests/factories.py                            | 89 +++++++++++++++++
 tests/factory_constants.py                    | 92 +++++++++++++++++
 tests/test_factories.py                       | 23 +++++
 .../secret/test_secret_scan_collection.py     | 98 ++++++++++++++++++-
 5 files changed, 300 insertions(+), 5 deletions(-)
 create mode 100644 tests/factories.py
 create mode 100644 tests/factory_constants.py
 create mode 100644 tests/test_factories.py

diff --git a/ggshield/verticals/secret/secret_scan_collection.py b/ggshield/verticals/secret/secret_scan_collection.py
index 0736087518..974c7b3f06 100644
--- a/ggshield/verticals/secret/secret_scan_collection.py
+++ b/ggshield/verticals/secret/secret_scan_collection.py
@@ -84,7 +84,8 @@ def from_scan_result(
         cls, file: Scannable, scan_result: ScanResult, secret_config: SecretConfig
     ):
         """Creates a Result from a Scannable and a ScanResult.
-        Removes ignored policy breaks
+        - Removes ignored policy breaks
+        - replace matches by ExtendedMatches
         """
 
         to_keep = []
diff --git a/tests/factories.py b/tests/factories.py
new file mode 100644
index 0000000000..2bd7ae369f
--- /dev/null
+++ b/tests/factories.py
@@ -0,0 +1,89 @@
+import random
+
+import factory
+import factory.fuzzy
+from pygitguardian.models import Match, PolicyBreak, ScanResult
+
+from ggshield.core.scan.scannable import StringScannable
+from ggshield.utils.git_shell import Filemode
+from tests.factory_constants import DETECTOR_NAMES, MATCH_NAMES
+
+
+def get_line_index(content, index):
+    """Return the index of the line containing the caracter at the given index"""
+    current_line_index = 0
+    lines = content.splitlines(keepends=True)
+    while True:
+        line = lines.pop(0)
+        if index <= len(line):
+            return current_line_index
+        index -= len(line)
+        current_line_index += 1
+
+
+class ScannableFactory(factory.Factory):
+    class Meta:
+        model = StringScannable
+
+    url = factory.Faker("hostname")
+    content = factory.Faker("text")
+    # Only returning FILE for new, since diff would need a custom content
+    filemode = Filemode.FILE
+
+
+class MatchFactory(factory.Factory):
+    class Meta:
+        model = Match
+
+    content = factory.Faker("text")
+    match_len = factory.fuzzy.FuzzyInteger(5, 15)
+    index_start = factory.lazy_attribute(
+        lambda obj: random.randint(0, len(obj.content) - obj.match_len)
+    )
+    index_end = factory.lazy_attribute(lambda obj: obj.index_start + obj.match_len)
+    match = factory.lazy_attribute(
+        lambda obj: obj.content[obj.index_start : obj.index_end]
+    )
+    line_start = factory.lazy_attribute(
+        lambda obj: get_line_index(obj.content, obj.index_start)
+    )
+    line_end = factory.lazy_attribute(
+        lambda obj: get_line_index(obj.content, obj.index_end)
+    )
+    match_type = factory.lazy_attribute(lambda obj: random.choice(MATCH_NAMES))
+
+
+class PolicyBreakFactory(factory.Factory):
+    class Meta:
+        model = PolicyBreak
+
+    break_type = factory.lazy_attribute(lambda obj: random.choice(DETECTOR_NAMES))
+    policy = "Secrets detection"
+    validity = "valid"
+    known_secret = False
+    incident_url = None
+    is_excluded = False
+    exclude_reason = None
+    diff_kind = None
+    content = factory.Faker("text")
+    nb_matches = factory.fuzzy.FuzzyInteger(1, 2)
+
+    @factory.lazy_attribute
+    def matches(self):
+        # Note: matches may overlap, but at least we ensure they
+        # have different names
+        match_names = random.sample(MATCH_NAMES, self.nb_matches)
+        return [
+            MatchFactory(match_type=match_name, content=self.content)
+            for match_name in match_names
+        ]
+
+
+class ScanResultFactory(factory.Factory):
+    class Meta:
+        model = ScanResult
+
+    policy_break_count = factory.lazy_attribute(lambda obj: len(obj.policy_breaks))
+    policy_breaks = []
+    policies = ["Secrets detection"]
+    is_diff = False
diff --git a/tests/factory_constants.py b/tests/factory_constants.py
new file mode 100644
index 0000000000..f5e4fc1df1
--- /dev/null
+++ b/tests/factory_constants.py
@@ -0,0 +1,92 @@
+DETECTOR_NAMES = [
+    "Basic Auth String",
+    "Generic Password",
+    "JSON Web Token",
+    "Generic Terraform Variable Secret",
+    "Generic Database Assignment",
+    "Company Email Password",
+    "Generic Password",
+    "Base64 Generic High Entropy Secret",
+    "Generic Database Assignment",
+    "Generic CLI Option Secret",
+    "Username Password",
+    "Generic High Entropy Secret",
+    "Base64 Basic Authentication",
+    "Bearer Token",
+    "Authentication Tuple",
+    "Typeform API Token",
+    "New Relic API Key",
+    "Pingdom token v3",
+    "Datadog Keys",
+    "Databricks Authentication Token With Hostname",
+    "GitGuardian Public Monitoring API Key",
+    "GitHub Server-to-server Token",
+    "Infracost API Key",
+    "Facebook App Keys",
+    "Firebase Cloud Messaging API Key",
+    "Intercom Token",
+    "Sourcegraph Access Token v1",
+    "Stripe Webhook Secret",
+    "GitLab Token",
+    "New Relic API Service Key",
+    "Eventbrite OAuth2 Token",
+    "Base64 AWS SES Keys",
+    "Doppler API Key",
+    "Heartland API key",
+    "Tailscale Pre-Authentication Key",
+    "Kraken Keys",
+    "Coveralls Repository Token",
+    "Docker Credentials",
+    "Algolia Monitoring Keys",
+    "Grafana Token",
+    "PackageCloud API Token",
+    "Square Access Token",
+    "DigitalOcean Token",
+    "Sourcegraph Access Token v3",
+    "Akamai API Credentials",
+    "Linode Personal Access Token",
+    "Scalr API Access Token",
+    "FullContact Key",
+    "Nylas API Key",
+    "Plaid Access Token",
+]
+MATCH_NAMES = [
+    "apikey",
+    "client_id",
+    "client_secret",
+    "host",
+    "password",
+    "username",
+    "token",
+    "port",
+    "scheme",
+    "connection_uri",
+    "subdomain",
+    "private_key",
+    "domain",
+    "secret_key",
+    "access_token",
+    "project_id",
+    "cloud_name",
+    "database",
+    "client_token",
+    "secret_token",
+    "tenant_id",
+    "private_key_id",
+    "integration_key",
+    "azure_endpoint",
+    "app_id",
+    "cluster",
+    "pub_key",
+    "sub_key",
+    "environment",
+    "refresh_token",
+    "organization",
+    "session_token",
+    "connection_string",
+    "account",
+    "user",
+    "client_certificate",
+    "client_key",
+    "config_value",
+]
diff --git a/tests/test_factories.py b/tests/test_factories.py
new file mode 100644
index 0000000000..811984e379
--- /dev/null
+++ b/tests/test_factories.py
@@ -0,0 +1,23 @@
+import pytest
+
+from tests.factories import get_line_index
+
+
+TEST_CONTENT = """aaa
+bb
+cccc"""
+
+
+@pytest.mark.parametrize(
+    ("index", "expected_line_index"),
+    (
+        (1, 0),
+        (4, 0),
+        (5, 1),
+        (7, 1),
+        (8, 2),
+        (11, 2),
+    ),
+)
+def test_get_line_index(index, expected_line_index):
+    assert get_line_index(TEST_CONTENT, index) == expected_line_index
diff --git a/tests/unit/verticals/secret/test_secret_scan_collection.py b/tests/unit/verticals/secret/test_secret_scan_collection.py
index 3733449bb3..dfb1f2218e 100644
--- a/tests/unit/verticals/secret/test_secret_scan_collection.py
+++ b/tests/unit/verticals/secret/test_secret_scan_collection.py
@@ -4,10 +4,16 @@
 from pygitguardian.models import ScanResult
 
 from ggshield.core.config.user_config import SecretConfig
+from ggshield.core.filter import get_ignore_sha
 from ggshield.core.scan import StringScannable
 from ggshield.core.types import IgnoredMatch
 from ggshield.verticals.secret import Results
-from ggshield.verticals.secret.secret_scan_collection import Result
+from ggshield.verticals.secret.secret_scan_collection import (
+    IgnoreReason,
+    Result,
+    compute_ignore_reason,
+)
+from tests.factories import PolicyBreakFactory, ScannableFactory, ScanResultFactory
 from tests.unit.conftest import (
     _ONE_LINE_AND_MULTILINE_PATCH_CONTENT,
     _ONE_LINE_AND_MULTILINE_PATCH_SCAN_RESULT,
@@ -79,7 +85,7 @@ def test_results_from_exception():
         ),
     ],
 )
-def test_create_result_remove_ignores(
+def test_create_result_removes_ignored_matches(
     content: str, scan_result: ScanResult, ignores: Iterable, final_len: int
 ) -> None:
     result = Result.from_scan_result(
@@ -92,5 +98,89 @@ def test_create_result_remove_ignores(
     assert len(result.policy_breaks) == final_len
 
 
-def test_ignore_all_secrets():
-    pass
+@pytest.mark.parametrize("all_secrets", (True, False))
+def test_create_result_removes_ignored_matches_bis(all_secrets):
+    """
+    GIVEN two different policy breaks
+    WHEN ignoring the first one
+    THEN it is ignored iff all_secrets is false
+
+    Note: this test could replace the one above
+    """
+    scannable = ScannableFactory()
+    policy_breaks = PolicyBreakFactory.create_batch(2, content=scannable.content)
+
+    # ensure policy breaks are different
+    if policy_breaks[0].matches[0].match_type == policy_breaks[1].matches[0].match_type:
+        policy_breaks[0].matches[0].match_type += "a"
+
+    config = SecretConfig(
+        ignored_matches=[
+            IgnoredMatch(name="x", match=get_ignore_sha(policy_breaks[0]))
+        ],
+        all_secrets=all_secrets,
+    )
+    result = Result.from_scan_result(
+        scannable, ScanResultFactory(policy_breaks=policy_breaks), config
+    )
+    if all_secrets:
+        assert len(result.policy_breaks) == 2
+        assert result.policy_breaks[0].is_excluded is True
+        assert result.policy_breaks[1].is_excluded is False
+    else:
+        assert len(result.policy_breaks) == 1
+        assert result.policy_breaks[0].is_excluded is False
+        assert (
+            result.ignored_policy_breaks_count_by_reason[IgnoreReason.IGNORED_MATCH]
+            == 1
+        )
+
+
+class TestComputeIgnoreReason:
+    def test_ignore_excluded(self):
+        """
+        GIVEN an policy break excluded from the backend
+        WHEN computing the ignore reason
+        THEN it contains the original exclusion reason (and is not None)
+        """
+        policy_break = PolicyBreakFactory(
+            is_excluded=True, exclude_reason="BACKEND_REASON"
+        )
+        assert "BACKEND_REASON" in compute_ignore_reason(policy_break, SecretConfig())
+
+    def test_ignore_ignored_match(self):
+        """
+        GIVEN an policy break matching an ignored sha in config
+        WHEN computing the ignore reason
+        THEN it's not None
+        """
+        policy_break = PolicyBreakFactory()
+        config = SecretConfig(
+            ignored_matches=[
+                IgnoredMatch(name="x", match=get_ignore_sha(policy_break))
+            ],
+        )
+        assert compute_ignore_reason(policy_break, config) is not None
+
+    def test_ignore_ignored_detector(self):
+        """
+        GIVEN an policy break matching an ignored detector in config
+        WHEN computing the ignore reason
+        THEN it's not None
+        """
+        policy_break = PolicyBreakFactory()
+        config = SecretConfig(
+            ignored_detectors=[policy_break.break_type],
+        )
+        assert compute_ignore_reason(policy_break, config) is not None
+
+    @pytest.mark.parametrize("ignore_known", (True, False))
+    def test_known_secret(self, ignore_known):
+        """
+        GIVEN a known policy break
+        WHEN computing the ignore reason
+        THEN it's not None iff ignore_secret is enabled in config
+        """
+        policy_break = PolicyBreakFactory(known_secret=True)
+        config = SecretConfig(ignore_known_secrets=ignore_known)
+        assert (compute_ignore_reason(policy_break, config) is not None) is ignore_known