Skip to content
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ repos:
rev: v3.2.0
hooks:
- id: trailing-whitespace
args: [--markdown-linebreak-ext=md]
- id: end-of-file-fixer
exclude: ^tests/auxiliary/.*
- id: check-yaml
Expand Down
18 changes: 12 additions & 6 deletions cdxev/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,12 +448,16 @@ def create_validation_parser(
parser.add_argument(
"--filename-pattern",
help=(
"Regex for validation of file name. "
"If no Regex is given the default 'name_version_hash_timestamp.cdx.json' "
"or 'bom.json' is used, "
"where name, version and timestamp are mandatory and taken from metadata. "
"Hash is optional as this is not a required information"
"Regex for validation of filename. If not specified, a default regex depending on "
"the schema-type is applied. To disable filename validation altogether, use "
"--no-filename-validation."
),
default="",
)
parser.add_argument(
"--no-filename-validation",
help="Disable filename validation",
action="store_true",
)
parser.add_argument(
"--schema-path",
Expand Down Expand Up @@ -804,7 +808,9 @@ def invoke_validate(args: argparse.Namespace) -> int:
report_format=report_format,
output=output,
schema_type=args.schema_type,
filename_regex=args.filename_pattern,
filename_regex=(
None if args.no_filename_validation else args.filename_pattern
),
schema_path=args.schema_path,
)
== _STATUS_OK
Expand Down
21 changes: 21 additions & 0 deletions cdxev/validator/helper.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import json
import re
import typing as t
from importlib import resources
from pathlib import Path

from cdxev.auxiliary.filename_gen import generate_validation_pattern
from cdxev.error import AppError


Expand Down Expand Up @@ -74,3 +77,21 @@ def get_external_schema(schema_path: Path) -> tuple[dict, Path]:
"Could not load schema",
("Path to the provided schema does not exist"),
)


def validate_filename(
filename: str,
regex: str,
sbom: dict,
schema_type: str,
) -> t.Union[t.Literal[False], str]:
if not regex:
if schema_type == "default":
regex = "^(bom\\.json|.+\\.cdx\\.json)$"
else:
regex = generate_validation_pattern(sbom)

if re.fullmatch(regex, filename) is None:
return "filename doesn't match regular expression " + regex
else:
return False
20 changes: 11 additions & 9 deletions cdxev/validator/validate.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
import logging
import re
import typing as t
from pathlib import Path

from jsonschema import Draft7Validator, FormatChecker
from referencing import Registry, Resource
from referencing.jsonschema import DRAFT202012

from cdxev.auxiliary.filename_gen import generate_validation_pattern
from cdxev.log import LogMessage
from cdxev.validator.customreports import GitLabCQReporter, WarningsNgReporter
from cdxev.validator.helper import load_spdx_schema, open_schema
from cdxev.validator.helper import load_spdx_schema, open_schema, validate_filename

logger = logging.getLogger(__name__)

Expand All @@ -21,7 +21,7 @@ def validate_sbom(
report_format: str,
output: Path,
schema_type: str = "default",
filename_regex: str = "",
filename_regex: t.Optional[str] = "",
schema_path: str = "",
) -> int:
errors = []
Expand All @@ -30,13 +30,15 @@ def validate_sbom(
sbom, file, schema_type, schema_path
)

if not filename_regex:
filename_regex = generate_validation_pattern(sbom)
if re.fullmatch(filename_regex, file.name) is None:
errors.append(
f"SBOM has the mistake: \
filename doesn't match regular expression {filename_regex}"
if filename_regex is not None:
filename_error = validate_filename(
file.name, filename_regex, sbom, schema_type
)
if filename_error:
if filename_regex == "" and schema_type == "default":
logger.warning(filename_error)
else:
errors.append("SBOM has the mistake: " + filename_error)

schema = Resource(
sbom_schema, specification=DRAFT202012
Expand Down
23 changes: 10 additions & 13 deletions docs/available_commands.md
Original file line number Diff line number Diff line change
Expand Up @@ -180,23 +180,20 @@ With the `--schema-path` flag, users can supply their own schema to the validato

cdx-ev validate bom.json --schema-path=C:\users\documents\sbom_schemas\example_schema.json # uses a schema "example_schema.json" saved on the users computer to verify the sbom

### Validation of file name
### Validation of filename

According to the [CycloneDX specification](https://cyclonedx.org/specification/overview/#recognized-file-patterns) there are commonly recognized file name patterns: `bom.json` and `*.cdx.json`.
The tool, by default, also validates the filename of the SBOM. Which filenames are accepted depends on several command-line options:

For unification this tool also validates the file name. Per default the following "regex" is validated:
* `--no-filename-validation` completely disables validation.
* Use `--filename-pattern` to provide a custom regex. The filename must be a full match, regex anchors (^ and $) are not required. Regex patterns often include special characters. Pay attention to escaping rules for your shell to ensure proper results.
* In all other cases, the acceptable filenames depend on the `--schema-type` option:
* Using the `default` schema (i.e., vanilla CycloneDX), the validator accepts the two patterns recommended by the [CycloneDX specification](https://cyclonedx.org/specification/overview/#recognized-file-patterns): `bom.json` or `*.cdx.json`.
* Using the `custom` schema, filenames must match one of these patterns: `bom.json` or `<name>_<version>_<hash>|<timestamp>|<hash>_<timestamp>.cdx.json`. Read on for some clarifications.

^name_version_(hash|timestamp|hash_timestamp).cdx.json$ | ^bom.json$
`<name>` and `<version>` correspond to the respective fields in `metadata.component` in the SBOM.

Where name, version, hash and timestamp are information of the `metadata` from a SBOM.

If it is desired to have a different Regex, this can be done via the flag `--filename-pattern`, i.e.:

cdx-ev validate mybom.json --filename-pattern=".*" # every character allowed
cdx-ev validate mybom.json --filename-pattern="(^bom\.json$)" # only bom.json allowed

Please note the usage uf quotation marks in the `--filename-pattern`. This is required for the escaping of special characters.
Otherwise, this may lead to undesired results as your input is not sanitized.
`<timestamp>` corresponds to `metadata.timestamp` and `<hash>` means any value in `metadata.component.hashes[].content`.
Either *timestamp* or *hash* must be present. If both are specified, *hash* must come first.

### Logging

Expand Down
75 changes: 49 additions & 26 deletions tests/test_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
import os
import unittest
from pathlib import Path
from unittest import mock
from unittest.mock import Mock, patch

from cdxev.error import AppError
from cdxev.validator.helper import validate_filename
from cdxev.validator.validate import validate_sbom

path_to_folder_with_test_sboms = "tests/auxiliary/test_validate_sboms/"
Expand All @@ -30,10 +31,10 @@ def search_for_word_issues(word: str, issue_list: list) -> bool:
return is_valid


@mock.patch("cdxev.validator.validate.logger")
@patch("cdxev.validator.validate.logger")
def validate_test(
sbom: dict,
mock_logger: unittest.mock.Mock,
mock_logger: Mock,
report_format: str = "stdout",
filename_regex: str = "",
schema_type: str = "custom",
Expand Down Expand Up @@ -63,29 +64,6 @@ def get_test_sbom(path_bom: str = path_to_sbom) -> dict:


class TestValidateInit(unittest.TestCase):
def test_filename_regex(self) -> None:
filename_regex = ".*"
sbom = get_test_sbom()
issues = validate_test(sbom, filename_regex=filename_regex)
self.assertEqual(issues, ["no issue"])

def test_wrong_filename(self) -> None:
filename_regex = "(myfancybom.json)"
sbom = get_test_sbom()
issues = validate_test(sbom, filename_regex=filename_regex)
self.assertTrue(search_for_word_issues("filename doesn't match", issues))

def test_right_hash_filename(self) -> None:
sbom = get_test_sbom()
issues = validate_test(sbom)
self.assertEqual(issues, ["no issue"])

def test_wrong_hash_filename(self) -> None:
sbom = get_test_sbom()
sbom["metadata"]["component"]["hashes"][0]["content"] = "1337"
issues = validate_test(sbom)
self.assertTrue(search_for_word_issues("filename doesn't match", issues))

@unittest.skipUnless("CI" in os.environ, "running only in CI")
def test_custom_schema(self) -> None:
sbom = get_test_sbom()
Expand Down Expand Up @@ -1111,3 +1089,48 @@ def test_internal_component_metadata_copyright_festo_no_supplier(self) -> None:
sbom["specVersion"] = spec_version
issues = validate_test(sbom)
self.assertEqual(search_for_word_issues("supplier", issues), True)


class TestValidateFilename(unittest.TestCase):
Comment thread
italvi marked this conversation as resolved.
def setUp(self) -> None:
self.sbom = get_test_sbom()

def test_valid_with_default_schema(self):
for filename in ["bom.json", "random.cdx.json", "-.cdx.json"]:
with self.subTest(filename=filename):
result = validate_filename(filename, "", self.sbom, "default")
self.assertFalse(result)

def test_invalid_with_default_schema(self):
for filename in ["bomjson", "bom.jso", "random.bom.json", ".cdx.json"]:
with self.subTest(filename=filename):
result = validate_filename(filename, "", self.sbom, "default")
self.assertIsInstance(result, str)

def test_valid_with_custom_schema(self):
for filename in [
"bom.json",
"Acme_Application_9.1.1_20220217T101458.cdx.json",
"Acme_Application_9.1.1_ec7781220ec7781220ec778122012345.cdx.json",
"Acme_Application_9.1.1_ec7781220ec7781220ec778122012345_20220217T101458.cdx.json",
]:
with self.subTest(filename=filename):
result = validate_filename(filename, "", self.sbom, "custom")
self.assertFalse(result)

def test_invalid_with_custom_schema(self):
for filename in [
"bomjson",
"bom.jso",
"random.bom.json",
".cdx.json",
"Acme_Application_20220217T101458.cdx.json",
"unknown_9.1.1_ec7781220ec7781220ec778122012345.cdx.json",
"Acme_Application_9.1.1.cdx.json",
"Acme_Application.cdx.json",
"Acme_Application_9.1.1_20220217T101458.json",
"Acme_Application_9.1.1_20220217T101458.cdx",
]:
with self.subTest(filename=filename):
result = validate_filename(filename, "", self.sbom, "custom")
self.assertIsInstance(result, str)