diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 76a927fc..86859846 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,6 +7,7 @@ repos: rev: v3.2.0 hooks: - id: trailing-whitespace + args: [--markdown-linebreak-ext=md] - id: end-of-file-fixer exclude: ^tests/auxiliary/.* - id: check-yaml diff --git a/cdxev/__main__.py b/cdxev/__main__.py index 91fedec7..baca0fe5 100644 --- a/cdxev/__main__.py +++ b/cdxev/__main__.py @@ -448,12 +448,16 @@ def create_validation_parser( parser.add_argument( "--filename-pattern", help=( - "Regex for validation of file name. " - "If no Regex is given the default 'name_version_hash_timestamp.cdx.json' " - "or 'bom.json' is used, " - "where name, version and timestamp are mandatory and taken from metadata. " - "Hash is optional as this is not a required information" + "Regex for validation of filename. If not specified, a default regex depending on " + "the schema-type is applied. To disable filename validation altogether, use " + "--no-filename-validation." ), + default="", + ) + parser.add_argument( + "--no-filename-validation", + help="Disable filename validation", + action="store_true", ) parser.add_argument( "--schema-path", @@ -804,7 +808,9 @@ def invoke_validate(args: argparse.Namespace) -> int: report_format=report_format, output=output, schema_type=args.schema_type, - filename_regex=args.filename_pattern, + filename_regex=( + None if args.no_filename_validation else args.filename_pattern + ), schema_path=args.schema_path, ) == _STATUS_OK diff --git a/cdxev/validator/helper.py b/cdxev/validator/helper.py index 46c0281e..890811b9 100644 --- a/cdxev/validator/helper.py +++ b/cdxev/validator/helper.py @@ -1,7 +1,10 @@ import json +import re +import typing as t from importlib import resources from pathlib import Path +from cdxev.auxiliary.filename_gen import generate_validation_pattern from cdxev.error import AppError @@ -74,3 +77,21 @@ def get_external_schema(schema_path: Path) -> tuple[dict, Path]: "Could not load schema", ("Path to the provided schema does not exist"), ) + + +def validate_filename( + filename: str, + regex: str, + sbom: dict, + schema_type: str, +) -> t.Union[t.Literal[False], str]: + if not regex: + if schema_type == "default": + regex = "^(bom\\.json|.+\\.cdx\\.json)$" + else: + regex = generate_validation_pattern(sbom) + + if re.fullmatch(regex, filename) is None: + return "filename doesn't match regular expression " + regex + else: + return False diff --git a/cdxev/validator/validate.py b/cdxev/validator/validate.py index 8cc0152b..cad9e820 100644 --- a/cdxev/validator/validate.py +++ b/cdxev/validator/validate.py @@ -1,15 +1,15 @@ import logging import re +import typing as t from pathlib import Path from jsonschema import Draft7Validator, FormatChecker from referencing import Registry, Resource from referencing.jsonschema import DRAFT202012 -from cdxev.auxiliary.filename_gen import generate_validation_pattern from cdxev.log import LogMessage from cdxev.validator.customreports import GitLabCQReporter, WarningsNgReporter -from cdxev.validator.helper import load_spdx_schema, open_schema +from cdxev.validator.helper import load_spdx_schema, open_schema, validate_filename logger = logging.getLogger(__name__) @@ -21,7 +21,7 @@ def validate_sbom( report_format: str, output: Path, schema_type: str = "default", - filename_regex: str = "", + filename_regex: t.Optional[str] = "", schema_path: str = "", ) -> int: errors = [] @@ -30,13 +30,15 @@ def validate_sbom( sbom, file, schema_type, schema_path ) - if not filename_regex: - filename_regex = generate_validation_pattern(sbom) - if re.fullmatch(filename_regex, file.name) is None: - errors.append( - f"SBOM has the mistake: \ - filename doesn't match regular expression {filename_regex}" + if filename_regex is not None: + filename_error = validate_filename( + file.name, filename_regex, sbom, schema_type ) + if filename_error: + if filename_regex == "" and schema_type == "default": + logger.warning(filename_error) + else: + errors.append("SBOM has the mistake: " + filename_error) schema = Resource( sbom_schema, specification=DRAFT202012 diff --git a/docs/available_commands.md b/docs/available_commands.md index dc7afbf5..23166ad4 100644 --- a/docs/available_commands.md +++ b/docs/available_commands.md @@ -180,23 +180,20 @@ With the `--schema-path` flag, users can supply their own schema to the validato cdx-ev validate bom.json --schema-path=C:\users\documents\sbom_schemas\example_schema.json # uses a schema "example_schema.json" saved on the users computer to verify the sbom -### Validation of file name +### Validation of filename -According to the [CycloneDX specification](https://cyclonedx.org/specification/overview/#recognized-file-patterns) there are commonly recognized file name patterns: `bom.json` and `*.cdx.json`. +The tool, by default, also validates the filename of the SBOM. Which filenames are accepted depends on several command-line options: -For unification this tool also validates the file name. Per default the following "regex" is validated: +* `--no-filename-validation` completely disables validation. +* Use `--filename-pattern` to provide a custom regex. The filename must be a full match, regex anchors (^ and $) are not required. Regex patterns often include special characters. Pay attention to escaping rules for your shell to ensure proper results. +* In all other cases, the acceptable filenames depend on the `--schema-type` option: + * Using the `default` schema (i.e., vanilla CycloneDX), the validator accepts the two patterns recommended by the [CycloneDX specification](https://cyclonedx.org/specification/overview/#recognized-file-patterns): `bom.json` or `*.cdx.json`. + * Using the `custom` schema, filenames must match one of these patterns: `bom.json` or `__||_.cdx.json`. Read on for some clarifications. - ^name_version_(hash|timestamp|hash_timestamp).cdx.json$ | ^bom.json$ +`` and `` correspond to the respective fields in `metadata.component` in the SBOM. -Where name, version, hash and timestamp are information of the `metadata` from a SBOM. - -If it is desired to have a different Regex, this can be done via the flag `--filename-pattern`, i.e.: - - cdx-ev validate mybom.json --filename-pattern=".*" # every character allowed - cdx-ev validate mybom.json --filename-pattern="(^bom\.json$)" # only bom.json allowed - -Please note the usage uf quotation marks in the `--filename-pattern`. This is required for the escaping of special characters. -Otherwise, this may lead to undesired results as your input is not sanitized. +`` corresponds to `metadata.timestamp` and `` means any value in `metadata.component.hashes[].content`. +Either *timestamp* or *hash* must be present. If both are specified, *hash* must come first. ### Logging diff --git a/tests/test_validate.py b/tests/test_validate.py index 9bb2ba63..eec4fcad 100644 --- a/tests/test_validate.py +++ b/tests/test_validate.py @@ -2,9 +2,10 @@ import os import unittest from pathlib import Path -from unittest import mock +from unittest.mock import Mock, patch from cdxev.error import AppError +from cdxev.validator.helper import validate_filename from cdxev.validator.validate import validate_sbom path_to_folder_with_test_sboms = "tests/auxiliary/test_validate_sboms/" @@ -30,10 +31,10 @@ def search_for_word_issues(word: str, issue_list: list) -> bool: return is_valid -@mock.patch("cdxev.validator.validate.logger") +@patch("cdxev.validator.validate.logger") def validate_test( sbom: dict, - mock_logger: unittest.mock.Mock, + mock_logger: Mock, report_format: str = "stdout", filename_regex: str = "", schema_type: str = "custom", @@ -63,29 +64,6 @@ def get_test_sbom(path_bom: str = path_to_sbom) -> dict: class TestValidateInit(unittest.TestCase): - def test_filename_regex(self) -> None: - filename_regex = ".*" - sbom = get_test_sbom() - issues = validate_test(sbom, filename_regex=filename_regex) - self.assertEqual(issues, ["no issue"]) - - def test_wrong_filename(self) -> None: - filename_regex = "(myfancybom.json)" - sbom = get_test_sbom() - issues = validate_test(sbom, filename_regex=filename_regex) - self.assertTrue(search_for_word_issues("filename doesn't match", issues)) - - def test_right_hash_filename(self) -> None: - sbom = get_test_sbom() - issues = validate_test(sbom) - self.assertEqual(issues, ["no issue"]) - - def test_wrong_hash_filename(self) -> None: - sbom = get_test_sbom() - sbom["metadata"]["component"]["hashes"][0]["content"] = "1337" - issues = validate_test(sbom) - self.assertTrue(search_for_word_issues("filename doesn't match", issues)) - @unittest.skipUnless("CI" in os.environ, "running only in CI") def test_custom_schema(self) -> None: sbom = get_test_sbom() @@ -1111,3 +1089,48 @@ def test_internal_component_metadata_copyright_festo_no_supplier(self) -> None: sbom["specVersion"] = spec_version issues = validate_test(sbom) self.assertEqual(search_for_word_issues("supplier", issues), True) + + +class TestValidateFilename(unittest.TestCase): + def setUp(self) -> None: + self.sbom = get_test_sbom() + + def test_valid_with_default_schema(self): + for filename in ["bom.json", "random.cdx.json", "-.cdx.json"]: + with self.subTest(filename=filename): + result = validate_filename(filename, "", self.sbom, "default") + self.assertFalse(result) + + def test_invalid_with_default_schema(self): + for filename in ["bomjson", "bom.jso", "random.bom.json", ".cdx.json"]: + with self.subTest(filename=filename): + result = validate_filename(filename, "", self.sbom, "default") + self.assertIsInstance(result, str) + + def test_valid_with_custom_schema(self): + for filename in [ + "bom.json", + "Acme_Application_9.1.1_20220217T101458.cdx.json", + "Acme_Application_9.1.1_ec7781220ec7781220ec778122012345.cdx.json", + "Acme_Application_9.1.1_ec7781220ec7781220ec778122012345_20220217T101458.cdx.json", + ]: + with self.subTest(filename=filename): + result = validate_filename(filename, "", self.sbom, "custom") + self.assertFalse(result) + + def test_invalid_with_custom_schema(self): + for filename in [ + "bomjson", + "bom.jso", + "random.bom.json", + ".cdx.json", + "Acme_Application_20220217T101458.cdx.json", + "unknown_9.1.1_ec7781220ec7781220ec778122012345.cdx.json", + "Acme_Application_9.1.1.cdx.json", + "Acme_Application.cdx.json", + "Acme_Application_9.1.1_20220217T101458.json", + "Acme_Application_9.1.1_20220217T101458.cdx", + ]: + with self.subTest(filename=filename): + result = validate_filename(filename, "", self.sbom, "custom") + self.assertIsInstance(result, str)