diff --git a/ggshield/cmd/secret/scan/secret_scan_common_options.py b/ggshield/cmd/secret/scan/secret_scan_common_options.py index cd27fb9cda..ed3dfbb3fc 100644 --- a/ggshield/cmd/secret/scan/secret_scan_common_options.py +++ b/ggshield/cmd/secret/scan/secret_scan_common_options.py @@ -140,6 +140,14 @@ def _banlist_detectors_callback( ) +_all_secrets = click.option( + "--all-secrets", + is_flag=True, + help=("Display all secrets, as well as their possible ignore reason."), + callback=create_config_callback("secret", "all_secrets"), +) + + def add_secret_scan_common_options() -> Callable[[AnyFunction], AnyFunction]: def decorator(cmd: AnyFunction) -> AnyFunction: add_common_options()(cmd) @@ -153,6 +161,7 @@ def decorator(cmd: AnyFunction) -> AnyFunction: _banlist_detectors_option(cmd) _with_incident_details_option(cmd) instance_option(cmd) + _all_secrets(cmd) return cmd return decorator diff --git a/ggshield/core/config/user_config.py b/ggshield/core/config/user_config.py index bbae28bde5..f4dcc955eb 100644 --- a/ggshield/core/config/user_config.py +++ b/ggshield/core/config/user_config.py @@ -47,6 +47,7 @@ class SecretConfig(FilteredConfig): ignore_known_secrets: bool = False with_incident_details: bool = False # if configuration key is left unset the dashboard's remediation message is used. + all_secrets: bool = False prereceive_remediation_message: str = "" def add_ignored_match(self, secret: IgnoredMatch) -> None: diff --git a/ggshield/verticals/secret/secret_scan_collection.py b/ggshield/verticals/secret/secret_scan_collection.py index 3220cc56c0..77bdafc059 100644 --- a/ggshield/verticals/secret/secret_scan_collection.py +++ b/ggshield/verticals/secret/secret_scan_collection.py @@ -1,21 +1,10 @@ from dataclasses import dataclass, field from enum import Enum from pathlib import Path -from typing import ( - Any, - Callable, - Dict, - Iterable, - List, - NamedTuple, - Optional, - Tuple, - Union, - cast, -) +from typing import Any, Dict, Iterable, List, NamedTuple, Optional, Tuple, Union, cast from pygitguardian import GGClient -from pygitguardian.models import Detail, Match, PolicyBreak, ScanResult, SecretIncident +from pygitguardian.models import Detail, Match, PolicyBreak, SecretIncident from ggshield.core.errors import UnexpectedError, handle_api_error from ggshield.core.lines import Line, get_lines_from_content @@ -24,7 +13,7 @@ from ggshield.verticals.secret.extended_match import ExtendedMatch -class IgnoreReason(Enum): +class IgnoreReason(str, Enum): IGNORED_MATCH = "ignored_match" IGNORED_DETECTOR = "ignored_detector" KNOWN_SECRET = "known_secret" @@ -42,14 +31,14 @@ class Result: path: Path url: str policy_breaks: List[PolicyBreak] - ignored_policy_breaks_count_by_reason: Dict[IgnoreReason, int] + ignored_policy_breaks_count_by_reason: Dict[str, int] - def __init__(self, file: Scannable, scan: ScanResult): + def __init__(self, file: Scannable, policy_breaks: List[PolicyBreak]): self.filename = file.filename self.filemode = file.filemode self.path = file.path self.url = file.url - self.policy_breaks = scan.policy_breaks + self.policy_breaks = policy_breaks lines = get_lines_from_content(file.content, self.filemode) self.enrich_matches(lines) self.ignored_policy_breaks_count_by_reason = {} @@ -90,21 +79,6 @@ def censor(self) -> None: def has_policy_breaks(self) -> bool: return len(self.policy_breaks) > 0 - def apply_ignore_function( - self, reason: IgnoreReason, ignore_function: Callable[[PolicyBreak], bool] - ): - if reason in self.ignored_policy_breaks_count_by_reason: - raise Exception(f"Ignore was already computed for {IgnoreReason}") - to_keep = [] - ignored_count = 0 - for policy_break in self.policy_breaks: - if ignore_function(policy_break): - ignored_count += 1 - else: - to_keep.append(policy_break) - self.policy_breaks = to_keep - self.ignored_policy_breaks_count_by_reason[reason] = ignored_count - class Error(NamedTuple): files: List[Tuple[str, Filemode]] diff --git a/ggshield/verticals/secret/secret_scanner.py b/ggshield/verticals/secret/secret_scanner.py index ecc045529c..b0bbccd877 100644 --- a/ggshield/verticals/secret/secret_scanner.py +++ b/ggshield/verticals/secret/secret_scanner.py @@ -3,11 +3,19 @@ import os import sys from ast import literal_eval +from collections import defaultdict from concurrent.futures import Future from typing import Dict, Iterable, List, Optional, Union from pygitguardian import GGClient -from pygitguardian.models import APITokensResponse, Detail, MultiScanResult, TokenScope +from pygitguardian.models import ( + APITokensResponse, + Detail, + MultiScanResult, + PolicyBreak, + ScanResult, + TokenScope, +) from ggshield.core import ui from ggshield.core.cache import Cache @@ -217,32 +225,7 @@ def _collect_results( assert isinstance(scan, MultiScanResult) for file, scan_result in zip(chunk, scan.scan_results): - result = Result( - file=file, - scan=scan_result, - ) - if not scan_result.has_policy_breaks: - continue - result.apply_ignore_function( - IgnoreReason.IGNORED_MATCH, - lambda policy_break: is_in_ignored_matches( - policy_break, self.ignored_matches - ), - ) - result.apply_ignore_function( - IgnoreReason.IGNORED_DETECTOR, - lambda policy_break: policy_break.break_type - in self.ignored_detectors, - ) - result.apply_ignore_function( - IgnoreReason.BACKEND_EXCLUDED, - lambda policy_break: policy_break.is_excluded, - ) - if self.secret_config.ignore_known_secrets: - result.apply_ignore_function( - IgnoreReason.KNOWN_SECRET, - lambda policy_break: policy_break.known_secret, - ) + result = self.create_result(file, scan_result) for policy_break in result.policy_breaks: self.cache.add_found_policy_break(policy_break, file.filename) results.append(result) @@ -250,6 +233,40 @@ def _collect_results( self.cache.save() return Results(results=results, errors=errors) + def compute_ignore(self, policy_break: PolicyBreak) -> str | None: + ignore_reason = None + if policy_break.is_excluded: + ignore_reason = f"Excluded from backend ({policy_break.exclude_reason})" + elif is_in_ignored_matches(policy_break, self.ignored_matches or []): + ignore_reason = IgnoreReason.IGNORED_MATCH + elif policy_break.break_type in self.ignored_detectors: + ignore_reason = IgnoreReason.IGNORED_DETECTOR + elif self.secret_config.ignore_known_secrets and policy_break.known_secret: + ignore_reason = IgnoreReason.KNOWN_SECRET + + return ignore_reason + + def create_result(self, file: Scannable, scan_result: ScanResult) -> Result: + to_keep = [] + ignored_policy_breaks_count_by_reason = defaultdict(lambda: 0) + for policy_break in scan_result.policy_breaks: + ignore_reason = self.compute_ignore(policy_break) + if ignore_reason is not None: + if self.secret_config.all_secrets: + policy_break.exclude_reason = ignore_reason + policy_break.is_excluded = True + to_keep.append(policy_break) + else: + ignored_policy_breaks_count_by_reason[ignore_reason] += 1 + else: + to_keep.append(policy_break) + + result = Result(file, to_keep) + result.ignored_policy_breaks_count_by_reason = ( + ignored_policy_breaks_count_by_reason + ) + return result + def handle_scan_chunk_error(detail: Detail, chunk: List[Scannable]) -> None: handle_api_error(detail) diff --git a/tests/unit/verticals/secret/output/test_json_output.py b/tests/unit/verticals/secret/output/test_json_output.py index 26a1041f75..e5c0af67a6 100644 --- a/tests/unit/verticals/secret/output/test_json_output.py +++ b/tests/unit/verticals/secret/output/test_json_output.py @@ -444,7 +444,7 @@ def test_ignore_known_secrets(verbose, ignore_known_secrets, secrets_types): url="leak.txt", filemode=Filemode.NEW, ), - scan=deepcopy(TWO_POLICY_BREAKS), # 2 policy breaks + policy_breaks=deepcopy(TWO_POLICY_BREAKS.policy_breaks), # 2 policy breaks ) all_policy_breaks = result.policy_breaks @@ -537,7 +537,7 @@ def test_with_incident_details( url="leak.txt", filemode=Filemode.NEW, ), - scan=deepcopy(TWO_POLICY_BREAKS), # 2 policy breaks + policy_breaks=deepcopy(TWO_POLICY_BREAKS.policy_breaks), # 2 policy breaks ) all_policy_breaks = result.policy_breaks diff --git a/tests/unit/verticals/secret/output/test_sarif_output.py b/tests/unit/verticals/secret/output/test_sarif_output.py index fb7e404e93..4c8fd046e2 100644 --- a/tests/unit/verticals/secret/output/test_sarif_output.py +++ b/tests/unit/verticals/secret/output/test_sarif_output.py @@ -192,7 +192,7 @@ def test_sarif_output_for_flat_scan_with_secrets( policy_break.known_secret = False policy_break.incident_url = None - result = Result(file=scannable, scan=scan_result) + result = Result(file=scannable, policy_breaks=scan_result.policy_breaks) results = Results(results=[result]) scan = SecretScanCollection(id="path", type="test", results=results) @@ -248,7 +248,7 @@ def test_sarif_output_for_nested_scan(init_secrets_engine_version): scannable = next(commit.get_files()) contents.append(scannable.content) - result = Result(file=scannable, scan=scan_result) + result = Result(file=scannable, policy_breaks=scan_result.policy_breaks) results = Results(results=[result]) scan = SecretScanCollection(id=f"nested{idx}", type="test", results=results) nested_scans.append(scan) diff --git a/tests/unit/verticals/secret/output/test_text_output.py b/tests/unit/verticals/secret/output/test_text_output.py index 07b322ac58..c5692d558d 100644 --- a/tests/unit/verticals/secret/output/test_text_output.py +++ b/tests/unit/verticals/secret/output/test_text_output.py @@ -47,7 +47,7 @@ url="leak.txt", filemode=Filemode.NEW, ), - scan=_SIMPLE_SECRET_PATCH_SCAN_RESULT, + policy_breaks=_SIMPLE_SECRET_PATCH_SCAN_RESULT.policy_breaks, ), id="_SIMPLE_SECRET_PATCH_SCAN_RESULT", ), @@ -58,7 +58,7 @@ url="leak.txt", filemode=Filemode.NEW, ), - scan=_MULTI_SECRET_ONE_LINE_PATCH_SCAN_RESULT, + policy_breaks=_MULTI_SECRET_ONE_LINE_PATCH_SCAN_RESULT.policy_breaks, ), id="_MULTI_SECRET_ONE_LINE_PATCH_SCAN_RESULT", ), @@ -69,7 +69,7 @@ url="leak.txt", filemode=Filemode.NEW, ), - scan=_MULTI_SECRET_ONE_LINE_PATCH_OVERLAY_SCAN_RESULT, + policy_breaks=_MULTI_SECRET_ONE_LINE_PATCH_OVERLAY_SCAN_RESULT.policy_breaks, ), id="_MULTI_SECRET_ONE_LINE_PATCH_OVERLAY_SCAN_RESULT", ), @@ -80,7 +80,7 @@ url="leak.txt", filemode=Filemode.NEW, ), - scan=_MULTI_SECRET_TWO_LINES_PATCH_SCAN_RESULT, + policy_breaks=_MULTI_SECRET_TWO_LINES_PATCH_SCAN_RESULT.policy_breaks, ), id="_MULTI_SECRET_TWO_LINES_PATCH_SCAN_RESULT", ), @@ -91,7 +91,7 @@ url="leak.txt", filemode=Filemode.NEW, ), - scan=_SIMPLE_SECRET_MULTILINE_PATCH_SCAN_RESULT, + policy_breaks=_SIMPLE_SECRET_MULTILINE_PATCH_SCAN_RESULT.policy_breaks, ), id="_SIMPLE_SECRET_MULTILINE_PATCH_SCAN_RESULT", ), @@ -102,7 +102,7 @@ url="leak.txt", filemode=Filemode.NEW, ), - scan=_ONE_LINE_AND_MULTILINE_PATCH_SCAN_RESULT, + policy_breaks=_ONE_LINE_AND_MULTILINE_PATCH_SCAN_RESULT.policy_breaks, ), id="_ONE_LINE_AND_MULTILINE_PATCH_CONTENT", ), diff --git a/tests/unit/verticals/secret/test_scan_repo.py b/tests/unit/verticals/secret/test_scan_repo.py index 9f94be9a11..d7ffd9a64e 100644 --- a/tests/unit/verticals/secret/test_scan_repo.py +++ b/tests/unit/verticals/secret/test_scan_repo.py @@ -123,11 +123,11 @@ def test_scan_2_commits_same_content(secret_scanner_mock): results=[ Result( commit_1_files[0], - scan=deepcopy(TWO_POLICY_BREAKS), + policy_breaks=deepcopy(TWO_POLICY_BREAKS.policy_breaks), ), Result( commit_2_files[0], - scan=deepcopy(TWO_POLICY_BREAKS), + policy_breaks=deepcopy(TWO_POLICY_BREAKS.policy_breaks), ), ], errors=[], @@ -208,15 +208,15 @@ def test_scan_2_commits_file_association(secret_scanner_mock): results=[ Result( file1_3, - scan=policy_breaks_file_1_3, + policy_breaks=policy_breaks_file_1_3.policy_breaks, ), Result( file2_1, - scan=policy_breaks_file_2_1, + policy_breaks=policy_breaks_file_2_1.policy_breaks, ), Result( file1_1, - scan=policy_breaks_file_1_1, + policy_breaks=policy_breaks_file_1_1.policy_breaks, ), ], errors=[],