Skip to content

Commit

Permalink
chore: cleanup ignore filters
Browse files Browse the repository at this point in the history
  • Loading branch information
gg-mmill committed Nov 22, 2024
1 parent 0336b28 commit 58e2ed4
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 71 deletions.
54 changes: 2 additions & 52 deletions ggshield/core/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
import math
import operator
import re
from typing import Dict, Iterable, List, Optional, Pattern, Set
from typing import Dict, Iterable, List, Pattern, Set

from click import UsageError
from pygitguardian.models import Match, PolicyBreak, ScanResult
from pygitguardian.models import Match, PolicyBreak

from ggshield.core.types import IgnoredMatch

Expand Down Expand Up @@ -47,42 +47,6 @@ def is_in_ignored_matches(
return False


def remove_ignored_from_result(
scan_result: ScanResult, matches_ignore: Iterable[IgnoredMatch]
) -> None:
"""
remove_ignored removes occurrences from a Scan Result based on a sha
made from its matches.
:param scan_result: ScanResult to filter
:param matches_ignore: match SHAs or plaintext matches to filter out
"""

scan_result.policy_breaks = [
policy_break
for policy_break in scan_result.policy_breaks
if not is_in_ignored_matches(policy_break, matches_ignore)
]

scan_result.policy_break_count = len(scan_result.policy_breaks)


def remove_results_from_ignore_detectors(
scan_result: ScanResult,
ignored_detectors: Optional[Set[str]] = None,
) -> None:
if not ignored_detectors:
return

scan_result.policy_breaks = [
policy_break
for policy_break in scan_result.policy_breaks
if policy_break.break_type not in ignored_detectors
]

scan_result.policy_break_count = len(scan_result.policy_breaks)


def get_ignore_sha(policy_break: PolicyBreak) -> str:
hashable = "".join(
[
Expand Down Expand Up @@ -178,17 +142,3 @@ def censor_string(text: str) -> str:

def censor_match(match: Match) -> str:
return censor_string(match.match)


def remove_known_secrets_from_result(scan_result: ScanResult):
"""
Remove secrets that are known by the dashboard
Only used if ignore_known_secret is set to True
"""
scan_result.policy_breaks = [
policy_break
for policy_break in scan_result.policy_breaks
if not policy_break.known_secret
]

scan_result.policy_break_count = len(scan_result.policy_breaks)
41 changes: 22 additions & 19 deletions tests/unit/core/test_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,17 @@
from pygitguardian.models import Match, PolicyBreak, ScanResult
from snapshottest import Snapshot

from ggshield.core.filter import (
censor_match,
get_ignore_sha,
remove_ignored_from_result,
)
from ggshield.core.filter import censor_match, get_ignore_sha, is_in_ignored_matches
from ggshield.core.scan.scannable import StringScannable
from ggshield.core.types import IgnoredMatch
from ggshield.verticals.secret.secret_scan_collection import Result
from tests.unit.conftest import (
_MULTILINE_SECRET,
_MULTIPLE_SECRETS_SCAN_RESULT,
_ONE_LINE_AND_MULTILINE_PATCH_CONTENT,
_ONE_LINE_AND_MULTILINE_PATCH_SCAN_RESULT,
_SIMPLE_SECRET_PATCH,
_SIMPLE_SECRET_PATCH_SCAN_RESULT,
_SIMPLE_SECRET_WITH_FILENAME_PATCH_SCAN_RESULT,
)


Expand Down Expand Up @@ -90,39 +89,38 @@ def test_get_ignore_sha(


@pytest.mark.parametrize(
"scan_result, ignores, final_len",
("content", "scan_result", "ignores", "final_len"),
[
pytest.param(
_SIMPLE_SECRET_PATCH,
_SIMPLE_SECRET_PATCH_SCAN_RESULT,
[],
_SIMPLE_SECRET_PATCH_SCAN_RESULT.policy_break_count,
id="_SIMPLE_SECRET_PATCH_SCAN_RESULT-no remove, not all policies",
),
pytest.param(
_SIMPLE_SECRET_WITH_FILENAME_PATCH_SCAN_RESULT,
[],
_SIMPLE_SECRET_WITH_FILENAME_PATCH_SCAN_RESULT.policy_break_count - 1,
id="_SIMPLE_SECRET_PATCH_WITH_FILENAME_SCAN_RESULT-not all policies",
),
pytest.param(
_SIMPLE_SECRET_PATCH,
_SIMPLE_SECRET_PATCH_SCAN_RESULT,
["2b5840babacb6f089ddcce1fe5a56b803f8b1f636c6f44cdbf14b0c77a194c93"],
0,
id="_SIMPLE_SECRET_PATCH_SCAN_RESULT-remove by sha",
),
pytest.param(
_SIMPLE_SECRET_PATCH,
_SIMPLE_SECRET_PATCH_SCAN_RESULT,
["368ac3edf9e850d1c0ff9d6c526496f8237ddf91"],
0,
id="_SIMPLE_SECRET_PATCH_SCAN_RESULT-remove by plaintext",
),
pytest.param(
_ONE_LINE_AND_MULTILINE_PATCH_CONTENT,
_ONE_LINE_AND_MULTILINE_PATCH_SCAN_RESULT,
["1945f4a0c42abb19c1a420ddd09b4b4681249a3057c427b95f794b18595e7ffa"],
2,
id="_MULTI_SECRET_ONE_LINE_PATCH_SCAN_RESULT-remove one by sha",
),
pytest.param(
_ONE_LINE_AND_MULTILINE_PATCH_CONTENT,
_ONE_LINE_AND_MULTILINE_PATCH_SCAN_RESULT,
[
"060bf63de122848f5efa122fe6cea504aae3b24cea393d887fdefa1529c6a02e",
Expand All @@ -134,14 +132,19 @@ def test_get_ignore_sha(
],
)
def test_remove_ignores(
scan_result: ScanResult, ignores: Iterable, final_len: int
content: str, scan_result: ScanResult, ignores: Iterable, final_len: int
) -> None:
copy_result = copy.deepcopy(scan_result)
ignored_matches = [IgnoredMatch(name="", match=x) for x in ignores]
remove_ignored_from_result(copy_result, ignored_matches)
result = Result(
file=StringScannable(url="localhost", content=content),
scan=copy.deepcopy(scan_result),
)

assert len(copy_result.policy_breaks) == final_len
assert copy_result.policy_break_count == final_len
ignored_matches = [IgnoredMatch(name="", match=x) for x in ignores]
result.apply_ignore_function(
"ignored_matches",
lambda policy_break: is_in_ignored_matches(policy_break, ignored_matches),
)
assert len(result.policy_breaks) == final_len


@pytest.mark.parametrize(
Expand Down

0 comments on commit 58e2ed4

Please sign in to comment.