From f9bb794399fc7ded808b9b531051c0e7a5248195 Mon Sep 17 00:00:00 2001 From: Mathias Millet Date: Fri, 29 Nov 2024 17:54:02 +0100 Subject: [PATCH] chore: add tests, with factories ! --- .../secret/secret_scan_collection.py | 3 +- tests/factories.py | 89 +++++++++++++++++ tests/factory_constants.py | 92 +++++++++++++++++ tests/test_factories.py | 23 +++++ .../secret/test_secret_scan_collection.py | 98 ++++++++++++++++++- 5 files changed, 300 insertions(+), 5 deletions(-) create mode 100644 tests/factories.py create mode 100644 tests/factory_constants.py create mode 100644 tests/test_factories.py diff --git a/ggshield/verticals/secret/secret_scan_collection.py b/ggshield/verticals/secret/secret_scan_collection.py index 0736087518..974c7b3f06 100644 --- a/ggshield/verticals/secret/secret_scan_collection.py +++ b/ggshield/verticals/secret/secret_scan_collection.py @@ -84,7 +84,8 @@ def from_scan_result( cls, file: Scannable, scan_result: ScanResult, secret_config: SecretConfig ): """Creates a Result from a Scannable and a ScanResult. - Removes ignored policy breaks + - Removes ignored policy breaks + - replace matches by ExtendedMatches """ to_keep = [] diff --git a/tests/factories.py b/tests/factories.py new file mode 100644 index 0000000000..2bd7ae369f --- /dev/null +++ b/tests/factories.py @@ -0,0 +1,89 @@ +import random + +import factory +import factory.fuzzy +from pygitguardian.models import Match, PolicyBreak, ScanResult + +from ggshield.core.scan.scannable import StringScannable +from ggshield.utils.git_shell import Filemode +from tests.factory_constants import DETECTOR_NAMES, MATCH_NAMES + + +def get_line_index(content, index): + """Return the index of the line containing the caracter at the given index""" + current_line_index = 0 + lines = content.splitlines(keepends=True) + while True: + line = lines.pop(0) + if index <= len(line): + return current_line_index + index -= len(line) + current_line_index += 1 + + +class ScannableFactory(factory.Factory): + class Meta: + model = StringScannable + + url = factory.Faker("hostname") + content = factory.Faker("text") + # Only returning FILE for new, since diff would need a custom content + filemode = Filemode.FILE + + +class MatchFactory(factory.Factory): + class Meta: + model = Match + + content = factory.Faker("text") + match_len = factory.fuzzy.FuzzyInteger(5, 15) + index_start = factory.lazy_attribute( + lambda obj: random.randint(0, len(obj.content) - obj.match_len) + ) + index_end = factory.lazy_attribute(lambda obj: obj.index_start + obj.match_len) + match = factory.lazy_attribute( + lambda obj: obj.content[obj.index_start : obj.index_end] + ) + line_start = factory.lazy_attribute( + lambda obj: get_line_index(obj.content, obj.index_start) + ) + line_end = factory.lazy_attribute( + lambda obj: get_line_index(obj.content, obj.index_end) + ) + match_type = factory.lazy_attribute(lambda obj: random.choice(MATCH_NAMES)) + + +class PolicyBreakFactory(factory.Factory): + class Meta: + model = PolicyBreak + + break_type = factory.lazy_attribute(lambda obj: random.choice(DETECTOR_NAMES)) + policy = "Secrets detection" + validity = "valid" + known_secret = False + incident_url = None + is_excluded = False + exclude_reason = None + diff_kind = None + content = factory.Faker("text") + nb_matches = factory.fuzzy.FuzzyInteger(1, 2) + + @factory.lazy_attribute + def matches(self): + # Note: matches may overlap, but at least we ensure they + # have different names + match_names = random.sample(MATCH_NAMES, self.nb_matches) + return [ + MatchFactory(match_type=match_name, content=self.content) + for match_name in match_names + ] + + +class ScanResultFactory(factory.Factory): + class Meta: + model = ScanResult + + policy_break_count = factory.lazy_attribute(lambda obj: len(obj.policy_breaks)) + policy_breaks = [] + policies = ["Secrets detection"] + is_diff = False diff --git a/tests/factory_constants.py b/tests/factory_constants.py new file mode 100644 index 0000000000..f5e4fc1df1 --- /dev/null +++ b/tests/factory_constants.py @@ -0,0 +1,92 @@ +DETECTOR_NAMES = [ + "Basic Auth String", + "Generic Password", + "JSON Web Token", + "Generic Terraform Variable Secret", + "Generic Database Assignment", + "Company Email Password", + "Generic Password", + "Base64 Generic High Entropy Secret", + "Generic Database Assignment", + "Generic CLI Option Secret", + "Username Password", + "Generic High Entropy Secret", + "Base64 Basic Authentication", + "Bearer Token", + "Authentication Tuple", + "Typeform API Token", + "New Relic API Key", + "Pingdom token v3", + "Datadog Keys", + "Databricks Authentication Token With Hostname", + "GitGuardian Public Monitoring API Key", + "GitHub Server-to-server Token", + "Infracost API Key", + "Facebook App Keys", + "Firebase Cloud Messaging API Key", + "Intercom Token", + "Sourcegraph Access Token v1", + "Stripe Webhook Secret", + "GitLab Token", + "New Relic API Service Key", + "Eventbrite OAuth2 Token", + "Base64 AWS SES Keys", + "Doppler API Key", + "Heartland API key", + "Tailscale Pre-Authentication Key", + "Kraken Keys", + "Coveralls Repository Token", + "Docker Credentials", + "Algolia Monitoring Keys", + "Grafana Token", + "PackageCloud API Token", + "Square Access Token", + "DigitalOcean Token", + "Sourcegraph Access Token v3", + "Akamai API Credentials", + "Linode Personal Access Token", + "Scalr API Access Token", + "FullContact Key", + "Nylas API Key", + "Plaid Access Token", +] +MATCH_NAMES = [ + "apikey", + "client_id", + "client_secret", + "host", + "password", + "username", + "token", + "port", + "scheme", + "connection_uri", + "subdomain", + "private_key", + "domain", + "secret_key", + "access_token", + "project_id", + "cloud_name", + "database", + "client_token", + "secret_token", + "tenant_id", + "private_key_id", + "integration_key", + "azure_endpoint", + "app_id", + "cluster", + "pub_key", + "sub_key", + "environment", + "refresh_token", + "organization", + "session_token", + "connection_string", + "account", + "user", + "client_certificate", + "client_key", + "config_value", +] diff --git a/tests/test_factories.py b/tests/test_factories.py new file mode 100644 index 0000000000..811984e379 --- /dev/null +++ b/tests/test_factories.py @@ -0,0 +1,23 @@ +import pytest + +from tests.factories import get_line_index + + +TEST_CONTENT = """aaa +bb +cccc""" + + +@pytest.mark.parametrize( + ("index", "expected_line_index"), + ( + (1, 0), + (4, 0), + (5, 1), + (7, 1), + (8, 2), + (11, 2), + ), +) +def test_get_line_index(index, expected_line_index): + assert get_line_index(TEST_CONTENT, index) == expected_line_index diff --git a/tests/unit/verticals/secret/test_secret_scan_collection.py b/tests/unit/verticals/secret/test_secret_scan_collection.py index 3733449bb3..dfb1f2218e 100644 --- a/tests/unit/verticals/secret/test_secret_scan_collection.py +++ b/tests/unit/verticals/secret/test_secret_scan_collection.py @@ -4,10 +4,16 @@ from pygitguardian.models import ScanResult from ggshield.core.config.user_config import SecretConfig +from ggshield.core.filter import get_ignore_sha from ggshield.core.scan import StringScannable from ggshield.core.types import IgnoredMatch from ggshield.verticals.secret import Results -from ggshield.verticals.secret.secret_scan_collection import Result +from ggshield.verticals.secret.secret_scan_collection import ( + IgnoreReason, + Result, + compute_ignore_reason, +) +from tests.factories import PolicyBreakFactory, ScannableFactory, ScanResultFactory from tests.unit.conftest import ( _ONE_LINE_AND_MULTILINE_PATCH_CONTENT, _ONE_LINE_AND_MULTILINE_PATCH_SCAN_RESULT, @@ -79,7 +85,7 @@ def test_results_from_exception(): ), ], ) -def test_create_result_remove_ignores( +def test_create_result_removes_ignored_matches( content: str, scan_result: ScanResult, ignores: Iterable, final_len: int ) -> None: result = Result.from_scan_result( @@ -92,5 +98,89 @@ def test_create_result_remove_ignores( assert len(result.policy_breaks) == final_len -def test_ignore_all_secrets(): - pass +@pytest.mark.parametrize("all_secrets", (True, False)) +def test_create_result_removes_ignored_matches_bis(all_secrets): + """ + GIVEN two different policy breaks + WHEN ignoring the first one + THEN it is ignored iff all_secrets is false + + Note: this test could replace the one above + """ + scannable = ScannableFactory() + policy_breaks = PolicyBreakFactory.create_batch(2, content=scannable.content) + + # ensure policy breaks are different + if policy_breaks[0].matches[0].match_type == policy_breaks[1].matches[0].match_type: + policy_breaks[0].matches[0].match_type += "a" + + config = SecretConfig( + ignored_matches=[ + IgnoredMatch(name="x", match=get_ignore_sha(policy_breaks[0])) + ], + all_secrets=all_secrets, + ) + result = Result.from_scan_result( + scannable, ScanResultFactory(policy_breaks=policy_breaks), config + ) + if all_secrets: + assert len(result.policy_breaks) == 2 + assert result.policy_breaks[0].is_excluded is True + assert result.policy_breaks[1].is_excluded is False + else: + assert len(result.policy_breaks) == 1 + assert result.policy_breaks[0].is_excluded is False + assert ( + result.ignored_policy_breaks_count_by_reason[IgnoreReason.IGNORED_MATCH] + == 1 + ) + + +class TestComputeIgnoreReason: + def test_ignore_excluded(self): + """ + GIVEN an policy break excluded from the backend + WHEN computing the ignore reason + THEN it contains the original exclusion reason (and is not None) + """ + policy_break = PolicyBreakFactory( + is_excluded=True, exclude_reason="BACKEND_REASON" + ) + assert "BACKEND_REASON" in compute_ignore_reason(policy_break, SecretConfig()) + + def test_ignore_ignored_match(self): + """ + GIVEN an policy break matching an ignored sha in config + WHEN computing the ignore reason + THEN it's not None + """ + policy_break = PolicyBreakFactory() + config = SecretConfig( + ignored_matches=[ + IgnoredMatch(name="x", match=get_ignore_sha(policy_break)) + ], + ) + assert compute_ignore_reason(policy_break, config) is not None + + def test_ignore_ignored_detector(self): + """ + GIVEN an policy break matching an ignored detector in config + WHEN computing the ignore reason + THEN it's not None + """ + policy_break = PolicyBreakFactory() + config = SecretConfig( + ignored_detectors=[policy_break.break_type], + ) + assert compute_ignore_reason(policy_break, config) is not None + + @pytest.mark.parametrize("ignore_known", (True, False)) + def test_known_secret(self, ignore_known): + """ + GIVEN a known policy break + WHEN computing the ignore reason + THEN it's not None iff ignore_secret is enabled in config + """ + policy_break = PolicyBreakFactory(known_secret=True) + config = SecretConfig(ignore_known_secrets=ignore_known) + assert (compute_ignore_reason(policy_break, config) is not None) is ignore_known