Skip to content

Commit

Permalink
chore: add tests, with factories !
Browse files Browse the repository at this point in the history
  • Loading branch information
gg-mmill committed Dec 2, 2024
1 parent c198bb0 commit f9bb794
Show file tree
Hide file tree
Showing 5 changed files with 300 additions and 5 deletions.
3 changes: 2 additions & 1 deletion ggshield/verticals/secret/secret_scan_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ def from_scan_result(
cls, file: Scannable, scan_result: ScanResult, secret_config: SecretConfig
):
"""Creates a Result from a Scannable and a ScanResult.
Removes ignored policy breaks
- Removes ignored policy breaks
- replace matches by ExtendedMatches
"""

to_keep = []
Expand Down
89 changes: 89 additions & 0 deletions tests/factories.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import random

import factory
import factory.fuzzy
from pygitguardian.models import Match, PolicyBreak, ScanResult

from ggshield.core.scan.scannable import StringScannable
from ggshield.utils.git_shell import Filemode
from tests.factory_constants import DETECTOR_NAMES, MATCH_NAMES


def get_line_index(content, index):
"""Return the index of the line containing the caracter at the given index"""
current_line_index = 0
lines = content.splitlines(keepends=True)
while True:
line = lines.pop(0)
if index <= len(line):
return current_line_index
index -= len(line)
current_line_index += 1


class ScannableFactory(factory.Factory):
class Meta:
model = StringScannable

url = factory.Faker("hostname")
content = factory.Faker("text")
# Only returning FILE for new, since diff would need a custom content
filemode = Filemode.FILE


class MatchFactory(factory.Factory):
class Meta:
model = Match

content = factory.Faker("text")
match_len = factory.fuzzy.FuzzyInteger(5, 15)
index_start = factory.lazy_attribute(
lambda obj: random.randint(0, len(obj.content) - obj.match_len)
)
index_end = factory.lazy_attribute(lambda obj: obj.index_start + obj.match_len)
match = factory.lazy_attribute(
lambda obj: obj.content[obj.index_start : obj.index_end]
)
line_start = factory.lazy_attribute(
lambda obj: get_line_index(obj.content, obj.index_start)
)
line_end = factory.lazy_attribute(
lambda obj: get_line_index(obj.content, obj.index_end)
)
match_type = factory.lazy_attribute(lambda obj: random.choice(MATCH_NAMES))


class PolicyBreakFactory(factory.Factory):
class Meta:
model = PolicyBreak

break_type = factory.lazy_attribute(lambda obj: random.choice(DETECTOR_NAMES))
policy = "Secrets detection"
validity = "valid"
known_secret = False
incident_url = None
is_excluded = False
exclude_reason = None
diff_kind = None
content = factory.Faker("text")
nb_matches = factory.fuzzy.FuzzyInteger(1, 2)

@factory.lazy_attribute
def matches(self):
# Note: matches may overlap, but at least we ensure they
# have different names
match_names = random.sample(MATCH_NAMES, self.nb_matches)
return [
MatchFactory(match_type=match_name, content=self.content)
for match_name in match_names
]


class ScanResultFactory(factory.Factory):
class Meta:
model = ScanResult

policy_break_count = factory.lazy_attribute(lambda obj: len(obj.policy_breaks))
policy_breaks = []
policies = ["Secrets detection"]
is_diff = False
92 changes: 92 additions & 0 deletions tests/factory_constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
DETECTOR_NAMES = [
"Basic Auth String",
"Generic Password",
"JSON Web Token",
"Generic Terraform Variable Secret",
"Generic Database Assignment",
"Company Email Password",
"Generic Password",
"Base64 Generic High Entropy Secret",
"Generic Database Assignment",
"Generic CLI Option Secret",
"Username Password",
"Generic High Entropy Secret",
"Base64 Basic Authentication",
"Bearer Token",
"Authentication Tuple",
"Typeform API Token",
"New Relic API Key",
"Pingdom token v3",
"Datadog Keys",
"Databricks Authentication Token With Hostname",
"GitGuardian Public Monitoring API Key",
"GitHub Server-to-server Token",
"Infracost API Key",
"Facebook App Keys",
"Firebase Cloud Messaging API Key",
"Intercom Token",
"Sourcegraph Access Token v1",
"Stripe Webhook Secret",
"GitLab Token",
"New Relic API Service Key",
"Eventbrite OAuth2 Token",
"Base64 AWS SES Keys",
"Doppler API Key",
"Heartland API key",
"Tailscale Pre-Authentication Key",
"Kraken Keys",
"Coveralls Repository Token",
"Docker Credentials",
"Algolia Monitoring Keys",
"Grafana Token",
"PackageCloud API Token",
"Square Access Token",
"DigitalOcean Token",
"Sourcegraph Access Token v3",
"Akamai API Credentials",
"Linode Personal Access Token",
"Scalr API Access Token",
"FullContact Key",
"Nylas API Key",
"Plaid Access Token",
]
MATCH_NAMES = [
"apikey",
"client_id",
"client_secret",
"host",
"password",
"username",
"token",
"port",
"scheme",
"connection_uri",
"subdomain",
"private_key",
"domain",
"secret_key",
"access_token",
"project_id",
"cloud_name",
"database",
"client_token",
"secret_token",
"tenant_id",
"private_key_id",
"integration_key",
"azure_endpoint",
"app_id",
"cluster",
"pub_key",
"sub_key",
"environment",
"refresh_token",
"organization",
"session_token",
"connection_string",
"account",
"user",
"client_certificate",
"client_key",
"config_value",
]
23 changes: 23 additions & 0 deletions tests/test_factories.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import pytest

from tests.factories import get_line_index


TEST_CONTENT = """aaa
bb
cccc"""


@pytest.mark.parametrize(
("index", "expected_line_index"),
(
(1, 0),
(4, 0),
(5, 1),
(7, 1),
(8, 2),
(11, 2),
),
)
def test_get_line_index(index, expected_line_index):
assert get_line_index(TEST_CONTENT, index) == expected_line_index
98 changes: 94 additions & 4 deletions tests/unit/verticals/secret/test_secret_scan_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,16 @@
from pygitguardian.models import ScanResult

from ggshield.core.config.user_config import SecretConfig
from ggshield.core.filter import get_ignore_sha
from ggshield.core.scan import StringScannable
from ggshield.core.types import IgnoredMatch
from ggshield.verticals.secret import Results
from ggshield.verticals.secret.secret_scan_collection import Result
from ggshield.verticals.secret.secret_scan_collection import (
IgnoreReason,
Result,
compute_ignore_reason,
)
from tests.factories import PolicyBreakFactory, ScannableFactory, ScanResultFactory
from tests.unit.conftest import (
_ONE_LINE_AND_MULTILINE_PATCH_CONTENT,
_ONE_LINE_AND_MULTILINE_PATCH_SCAN_RESULT,
Expand Down Expand Up @@ -79,7 +85,7 @@ def test_results_from_exception():
),
],
)
def test_create_result_remove_ignores(
def test_create_result_removes_ignored_matches(
content: str, scan_result: ScanResult, ignores: Iterable, final_len: int
) -> None:
result = Result.from_scan_result(
Expand All @@ -92,5 +98,89 @@ def test_create_result_remove_ignores(
assert len(result.policy_breaks) == final_len


def test_ignore_all_secrets():
pass
@pytest.mark.parametrize("all_secrets", (True, False))
def test_create_result_removes_ignored_matches_bis(all_secrets):
"""
GIVEN two different policy breaks
WHEN ignoring the first one
THEN it is ignored iff all_secrets is false
Note: this test could replace the one above
"""
scannable = ScannableFactory()
policy_breaks = PolicyBreakFactory.create_batch(2, content=scannable.content)

# ensure policy breaks are different
if policy_breaks[0].matches[0].match_type == policy_breaks[1].matches[0].match_type:
policy_breaks[0].matches[0].match_type += "a"

config = SecretConfig(
ignored_matches=[
IgnoredMatch(name="x", match=get_ignore_sha(policy_breaks[0]))
],
all_secrets=all_secrets,
)
result = Result.from_scan_result(
scannable, ScanResultFactory(policy_breaks=policy_breaks), config
)
if all_secrets:
assert len(result.policy_breaks) == 2
assert result.policy_breaks[0].is_excluded is True
assert result.policy_breaks[1].is_excluded is False
else:
assert len(result.policy_breaks) == 1
assert result.policy_breaks[0].is_excluded is False
assert (
result.ignored_policy_breaks_count_by_reason[IgnoreReason.IGNORED_MATCH]
== 1
)


class TestComputeIgnoreReason:
def test_ignore_excluded(self):
"""
GIVEN an policy break excluded from the backend
WHEN computing the ignore reason
THEN it contains the original exclusion reason (and is not None)
"""
policy_break = PolicyBreakFactory(
is_excluded=True, exclude_reason="BACKEND_REASON"
)
assert "BACKEND_REASON" in compute_ignore_reason(policy_break, SecretConfig())

def test_ignore_ignored_match(self):
"""
GIVEN an policy break matching an ignored sha in config
WHEN computing the ignore reason
THEN it's not None
"""
policy_break = PolicyBreakFactory()
config = SecretConfig(
ignored_matches=[
IgnoredMatch(name="x", match=get_ignore_sha(policy_break))
],
)
assert compute_ignore_reason(policy_break, config) is not None

def test_ignore_ignored_detector(self):
"""
GIVEN an policy break matching an ignored detector in config
WHEN computing the ignore reason
THEN it's not None
"""
policy_break = PolicyBreakFactory()
config = SecretConfig(
ignored_detectors=[policy_break.break_type],
)
assert compute_ignore_reason(policy_break, config) is not None

@pytest.mark.parametrize("ignore_known", (True, False))
def test_known_secret(self, ignore_known):
"""
GIVEN a known policy break
WHEN computing the ignore reason
THEN it's not None iff ignore_secret is enabled in config
"""
policy_break = PolicyBreakFactory(known_secret=True)
config = SecretConfig(ignore_known_secrets=ignore_known)
assert (compute_ignore_reason(policy_break, config) is not None) is ignore_known

0 comments on commit f9bb794

Please sign in to comment.