diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0e9fd4b6..76a927fc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,6 +8,7 @@ repos: hooks: - id: trailing-whitespace - id: end-of-file-fixer + exclude: ^tests/auxiliary/.* - id: check-yaml - repo: https://github.com/psf/black-pre-commit-mirror rev: "24.1.1" diff --git a/NOTICE b/NOTICE new file mode 100644 index 00000000..5b955f60 --- /dev/null +++ b/NOTICE @@ -0,0 +1,6 @@ +CycloneDX Editor Validator +Copyright (c) 2023-2024 Festo SE & Co. KG + +This product includes material developed by third parties: +License name to SPDX id mapping - Copyright (c) OWASP Foundation - - Apache-2.0 +License name to SPDX id mapping - Copyright (c) The Linux Foundation - - CC-BY-3.0 diff --git a/cdxev/__main__.py b/cdxev/__main__.py index 70f58662..91fedec7 100644 --- a/cdxev/__main__.py +++ b/cdxev/__main__.py @@ -1,15 +1,22 @@ import argparse +import inspect import json import logging import os import re +import shutil import sys +import textwrap +from dataclasses import dataclass from pathlib import Path -from typing import List, NoReturn, Optional, Tuple +from typing import TYPE_CHECKING, List, NoReturn, Optional, Tuple +import docstring_parser + +import cdxev.amend.command as amend import cdxev.set from cdxev import pkg -from cdxev.amend.command import run as amend +from cdxev.amend.operations import Operation from cdxev.auxiliary.identity import Key, KeyType from cdxev.auxiliary.output import write_sbom from cdxev.build_public_bom import build_public_bom @@ -172,29 +179,176 @@ def add_output_argument(parser: argparse.ArgumentParser) -> None: ) +@dataclass +class _AmendOperationDetails: + cls: type[Operation] + name: str + short_description: str + long_description: str + options: list[dict] + is_default: bool + + +_upper_case_letters_after_first = re.compile(r"(? _AmendOperationDetails: + """ + Gets details about an amend operation which are required for the argument parser. + + :param cls: The operation class. Must be a subclass of :py:class:`Operation`. + :return: Details about the operation documentation and its options. + """ + if TYPE_CHECKING: + # Shut up mypy. If these assertions don't hold, + # integration tests will break, so no problem at runtime. + assert cls.__doc__ is not None # nosec B101 + assert cls.__init__.__doc__ is not None # nosec B101 + op_name = re.sub(_upper_case_letters_after_first, "-", cls.__name__).lower() + op_doc = docstring_parser.parse(cls.__doc__) + op_short_help = op_doc.short_description + op_long_help = op_doc.long_description + op_is_default = getattr(cls, "_amendDefault", False) + init_sig = inspect.signature(cls.__init__) + init_params = { + name: param + for name, param in init_sig.parameters.items() + if name not in ("self", "args", "kwargs") + } + init_doc = docstring_parser.parse(cls.__init__.__doc__) + + args = [] + for name, param in init_params.items(): + if name == "self": + continue + + param_doc = next(p for p in init_doc.params if p.arg_name == name) + args.append( + { + "dest": name, + "name": "--" + name.replace("_", "-"), + "type": param.annotation, + "default": param.default, + "help": param_doc.description, + } + ) + + return _AmendOperationDetails( + cls=cls, + name=op_name, + short_description=op_short_help or "", + long_description=op_long_help or "", + is_default=op_is_default, + options=args, + ) + + +def reflow_paragraphs(text: str, indent: int = 8) -> str: + """ + Reformats a string comprised of several paragraphs to properly output it to the console. + + This function considers double newlines ('\\n\\n') paragraph breaks and will preserve them. + Any other whitespace, including single newlines will be collapsed. + + The width of the final string is equal to the terminal width but capped at 160 characters. + + :param text: The string to reformat. + :param indent: The number of spaces to add before each line. + :returns: The reformatted string. + """ + max_width = min(shutil.get_terminal_size()[0], 160) + textwrapper = textwrap.TextWrapper( + width=max_width, + initial_indent=" " * indent, + subsequent_indent=" " * indent, + ) + text = textwrap.dedent(text) + paragraphs = [textwrapper.fill(para) for para in text.split("\n\n")] + + return "\n\n".join(paragraphs) + + # noinspection PyUnresolvedReferences,PyProtectedMember def create_amend_parser( subparsers: argparse._SubParsersAction, ) -> argparse.ArgumentParser: + description = ( + "The amend command splits its functionality into several operations.\n" + "You can select which operations run using the --operation option. " + "If you don't, operations marked [default] will run.\n" + "The following operations are available:\n\n" + ) + + operations = amend.get_all_operations() + operation_details = [get_operation_details(op) for op in operations] + operation_details = sorted(operation_details, key=lambda op: op.name) + + operations_by_name: dict[str, _AmendOperationDetails] = {} + argument_groups: dict[str, list[dict]] = {} + default_operations: list[str] = [] + for op in operation_details: + setattr(op.cls, "_details", op) + + # Add operation to map + operations_by_name[op.name] = op + + # Prepare options to add them to the parser later + if op.options: + argument_groups[op.name] = op.options + + # Add operation to help text + if op.is_default: + default_operations.append(op.name) + description += f" {op.name} [default]:\n" + else: + description += f" {op.name}:\n" + + desc = reflow_paragraphs(op.short_description) + description += desc + "\n\n" + parser = subparsers.add_parser( "amend", help="Adds missing auto-generatable information to an existing SBOM", + description=description, + formatter_class=argparse.RawDescriptionHelpFormatter, ) parser.add_argument( "input", metavar="", help="Path to the SBOM file.", type=Path, + default=None, + nargs="?", ) parser.add_argument( - "--license-path", - metavar="", - help="Path to a folder with txt-files containing license texts to be copied in the SBOM", - type=str, - default="", + "--operation", + help=( + "Select an operation to run. Can be provided more than once to run multiple " + "operations in one run." + ), + choices=list(operations_by_name.keys()), + metavar="", + default=default_operations, + action="append", ) + parser.add_argument( + "--help-operation", + help="Displays details about an operation and exits afterwards.", + choices=list(operations_by_name.keys()), + metavar="", + ) + + # Add arguments for operation options + for group, args in argument_groups.items(): + group_parser = parser.add_argument_group(f"Options for '{group}'") + for opt in args: + name = opt["name"] + del opt["name"] + group_parser.add_argument(name, **opt) + add_output_argument(parser) + parser.set_defaults(operations_by_name=operations_by_name) parser.set_defaults(cmd_handler=invoke_amend) return parser @@ -465,8 +619,39 @@ def create_build_public_bom_parser( def invoke_amend(args: argparse.Namespace) -> int: + if args.help_operation: + short_desc = args.operations_by_name[args.help_operation].short_description + long_desc = reflow_paragraphs( + args.operations_by_name[args.help_operation].long_description, indent=0 + ) + + print() + print(short_desc) + print("-" * len(short_desc)) + print() + print(long_desc) + print() + + sys.exit() + + if not args.input: + usage_error(" argument missing.") + sbom, _ = read_sbom(args.input) - amend(sbom, args.license_path) + + # Prepare the operation options that were passed on the command-line + config = {} + operations = [] + for op in args.operation: + details = args.operations_by_name[op] + operations.append(details.cls) + op_arguments = {} + for opt in details.options: + dest = opt["dest"] + op_arguments[dest] = getattr(args, dest) + config[op] = op_arguments + + amend.run(sbom, operations, config) write_sbom(sbom, args.output) return _STATUS_OK diff --git a/cdxev/amend/__init__.py b/cdxev/amend/__init__.py index 8b6549e4..e69de29b 100644 --- a/cdxev/amend/__init__.py +++ b/cdxev/amend/__init__.py @@ -1,16 +0,0 @@ -from .command import register_operation -from .operations import ( - AddBomRefOperation, - CompositionsOperation, - DefaultAuthorOperation, - InferCopyright, - InferSupplier, - ProcessLicense, -) - -register_operation(AddBomRefOperation()) -register_operation(DefaultAuthorOperation()) -register_operation(CompositionsOperation()) -register_operation(InferSupplier()) -register_operation(ProcessLicense()) -register_operation(InferCopyright()) diff --git a/cdxev/amend/command.py b/cdxev/amend/command.py index 28218bef..13127ef6 100644 --- a/cdxev/amend/command.py +++ b/cdxev/amend/command.py @@ -1,54 +1,69 @@ import logging +import typing as t from cdxev.auxiliary.sbomFunctions import walk_components -from .operations import Operation, ProcessLicense +from .operations import Operation -__operations: list[Operation] = [] logger = logging.getLogger(__name__) -def register_operation(operation: Operation) -> None: - """ - Registers an operation for the amend command. +def get_all_operations() -> list[type[Operation]]: + return Operation.__subclasses__() - This function is typically invoked in __init__.py. - """ - __operations.append(operation) +def create_operations( + operations: list[type[Operation]], config: dict[type[Operation], dict[str, t.Any]] +) -> list["Operation"]: + instances = [] + for op in operations: + options = config.get(op, {}) + instances.append(op(**options)) + + return instances -def run(sbom: dict, path_to_license_folder: str = "") -> None: + +def run( + sbom: dict, + selected: t.Optional[list[type[Operation]]] = None, + config: dict[type[Operation], dict[str, t.Any]] = {}, +) -> None: """ Runs the amend command on an SBOM. The SBOM is modified in-place. :param dict sbom: The SBOM model. - :param str path_to_license_folder: Path to a folder with license texts. + :param selected: List of operation classes to run on the SBOM. + :param config: Arguments for the operations. They will be passed to the operation's + __init__() method as kw-args. """ - for operation in __operations: - if type(operation) is ProcessLicense: - operation.change_path_to_license_folder(path_to_license_folder) - _prepare(sbom) - _metadata(sbom) - walk_components(sbom, _do_amend, skip_meta=True) + # If no operations are selected, select the default operations. + if not selected: + selected = [op for op in get_all_operations() if hasattr(op, "_amendDefault")] + + operations = create_operations(selected, config) + + _prepare(operations, sbom) + _metadata(operations, sbom) + walk_components(sbom, _do_amend, operations, skip_meta=True) -def _prepare(sbom: dict) -> None: - for operation in __operations: +def _prepare(operations: list[Operation], sbom: dict) -> None: + for operation in operations: operation.prepare(sbom) -def _metadata(sbom: dict) -> None: +def _metadata(operations: list[Operation], sbom: dict) -> None: if "metadata" not in sbom: return logger.debug("Processing metadata") metadata = sbom["metadata"] - for operation in __operations: + for operation in operations: operation.handle_metadata(metadata) -def _do_amend(component: dict) -> None: - for operation in __operations: +def _do_amend(component: dict, operations: list[Operation]) -> None: + for operation in operations: logger.debug( "Processing component %s", (component.get("bom-ref", "")) ) diff --git a/cdxev/amend/license.py b/cdxev/amend/license.py new file mode 100644 index 00000000..21dd07d9 --- /dev/null +++ b/cdxev/amend/license.py @@ -0,0 +1,47 @@ +from collections.abc import Callable + + +def license_has_id(license: dict) -> bool: + """ + Returns ``True`` if ``license`` contains an SPDX id. + + :param license: A license object. + :returns: ``True`` if ``license`` contains an SPDX id. + """ + return "id" in license + + +def license_has_text(license: dict) -> bool: + """ + Returns ``True`` if ``license`` contains a non-empty text. + + :param license: A license object. + :returns: ``True`` if ``license`` contains a non-empty text. + """ + return "text" in license and license["text"]["content"] + + +def foreach_license(callable: Callable[[dict, dict], None], component: dict) -> None: + """ + Runs the given callable on every license contained in the given component. + + SPDX license expressions are not considered. Components declaring their licenses in this form + are skipped. + + For every other license, ``callable`` is invoked with the license object (i.e., the object + containing the ``id`` and ``name`` properties) and the component itself as arguments. + + :param callable: A callable object that can accept a license object as its first and the + declaring component as its second argument. + :param component: The component whose licenses to process. + """ + if "licenses" not in component: + return + + for license_container in component["licenses"]: + if "license" not in license_container: + # We don't do anything with SPDX expressions + continue + + license = license_container["license"] + callable(license, component) diff --git a/cdxev/amend/operations.py b/cdxev/amend/operations.py index 92556453..f046fdd3 100644 --- a/cdxev/amend/operations.py +++ b/cdxev/amend/operations.py @@ -1,19 +1,91 @@ """ This module defines the amend operations which can be performed on an SBOM. -It also declares a base class to inherit from when implementing new operations. + +A few general rules for operations: + +One operation, one task. **Do not** make a single operation do lots of different things. +This let's users of the tool decide for themselves which changes to make to their SBOM +by selecting the operations to run. + +Be mindful of an operation's impact when deciding to add the :py:func:`default` decorator. +Some operations are always safe to run. Others might introduce uncertainty to the SBOM. These are +a judgment call. Others again might add potentially false claims if used without thought. +**Do not** make these run by default. + +Examples: +^^^^^^^^^ + +* *:py:class:`AddBomRef` is safe.* It never does anything to an SBOM that could change its + meaning. +* :py:class:`Compositions` introduces an intentional uncertainty about the completeness of the + SBOM's information. *We deem it okay to run by default because at worst it means the SBOM is + a little less expressive than it could be.* +* :py:class:`DeleteAmbiguousLicenses` introduces uncertainty about the completeness of the + license claims made for each component. It is meant to eliminate essentially useless + clutter but consumers of the SBOM could take the absence of license claims in the SBOM as + a sign that the component is not licensed. *So it should be used with caution and does not run + by default.* +* *:py:class:`InferCopyright` is dangerous*. It should only be used in controlled circumstances + - e.g., when it is known that no unintended components will be affected - because it could + add entirely false claims with legal relevance to the SBOM. + +Implementation notes +-------------------- + +If you want to add additional operations to the amend command, do it like this: + +#. Add a new class whose name is succinct and distinctive. It will be used in the CLI. +#. Subclass :py:class:`Operation`. +#. Override the methods defined in the base class, where necessary. +#. Add a docstring to your class. + + * It will be exposed on the CLI so **do not** add any formatting syntax. Stick to raw text. + * Keep the first line short and clear. It will be part of the general help for the *amend* + command. + +#. If your operation requires additional options provided by the user, add an `__init__()` method. + + * Add any option to `__init__()`'s parameter list. The parameter name will be used as a CLI + option. + * You MUST specify a default value. + * You MUST add a docstring to `__init__()` which describes the parameter. This description will + be visible in the command-line help text. + +#. If you want to add your operation to the default set, add the :py:func:`default` decorator. + See above about important considerations before doing so. + """ import datetime import importlib.resources import json import logging +import typing as t import uuid +from functools import cache +from pathlib import Path -from cdxev.amend.process_license import delete_license_unknown, process_license +import charset_normalizer + +from cdxev.amend.license import foreach_license, license_has_id, license_has_text +from cdxev.auxiliary.identity import ComponentIdentity +from cdxev.error import AppError +from cdxev.log import LogMessage logger = logging.getLogger(__name__) +def default(cls: type["Operation"]) -> type["Operation"]: + """ + Decorator to mark default operations. + + Add this decorator to a subclass of `Operation` to make it run if no operations + are explicitly selected. + """ + setattr(cls, "_amendDefault", True) + return cls + + class Operation: """ Base class for operations which modify the SBOM. @@ -38,12 +110,13 @@ def handle_component(self, component: dict) -> None: """ -class AddBomRefOperation(Operation): +@default +class AddBomRef(Operation): """ Adds a 'bom-ref' to components which don't have one yet. - Since the operation isn't easily able to determine the position of the component - it processes in the component tree, it generates UUIDs for bom-refs. + This operation generates bom-refs comprising a single UUIDv4 for any component which doesn't + have an SBOM. """ def handle_metadata(self, metadata: dict) -> None: @@ -58,37 +131,68 @@ def _add_bom_ref(self, component: dict) -> None: component["bom-ref"] = str(uuid.uuid4()) -class CompositionsOperation(Operation): +@default +class Compositions(Operation): """ - According to https://www.ntia.gov/files/ntia/publications/sbom_minimum_elements_report.pdf - "known unknowns" should be stated, as we can't guarantee completeness, - compositions should be marked as 'incomplete' for SBOMs. - This operation erases existing compositions and then adds all components as 'incomplete'. + Declares all component compositions as 'unknown'. + + Any existing entries in 'compositions' are replaced by a single entry that marks all + components in the SBOM as 'unknown'. This serves two goals: + - The NTIA recommends that known unknowns be made explicit. + https://www.ntia.gov/files/ntia/publications/sbom_minimum_elements_report.pdf + - It is safer to err on the side of caution when making claims about completeness. + + This excludes the metadata component because any SBOM supplier should be able to state the + level of completeness of its first-level components. """ - __assemblies: list + __compositions: list + __unknown_assemblies: list + __metacomp_aggregate: t.Optional[str] def prepare(self, sbom: dict) -> None: """ - Clears any existing compositions and creates an empty composition for "incomplete" + Clears any existing compositions and creates an empty composition for "unknown" assemblies. """ - if "compositions" in sbom: - del sbom["compositions"] - - sbom["compositions"] = [{"aggregate": "incomplete", "assemblies": []}] + metacomp = sbom.get("metadata", {}).get("component", {}).get("bom-ref", None) + self.__compositions = sbom.setdefault("compositions", []) + + # Remember the old aggregate of the metadata component + self.__metacomp_aggregate = next( + ( + comp["aggregate"] + for comp in self.__compositions + if metacomp in comp.get("assemblies", []) + ), + None, + ) - self.__assemblies = sbom["compositions"][0]["assemblies"] + # Replace any existing compositions with a new, empty list + self.__compositions.clear() + self.__compositions.append({"aggregate": "unknown", "assemblies": []}) + self.__unknown_assemblies = self.__compositions[0]["assemblies"] def handle_metadata(self, metadata: dict) -> None: + metacomp = metadata.get("component", {}).get("bom-ref", None) + if not metacomp or not self.__metacomp_aggregate: + return + try: - self.__add_to_assemblies(metadata["component"]["bom-ref"]) - except KeyError: - logger.debug( - "Cannot add meta-component to compositions because it has no bom-ref." + composition = next( + comp + for comp in self.__compositions + if comp["aggregate"] == self.__metacomp_aggregate ) - pass + assemblies = composition.setdefault("assemblies", []) + assemblies.append(metacomp) + except StopIteration: + composition = { + "aggregate": self.__metacomp_aggregate, + "assemblies": [metacomp], + } + self.__compositions.append(composition) def handle_component(self, component: dict) -> None: try: @@ -101,13 +205,12 @@ def handle_component(self, component: dict) -> None: def __add_to_assemblies(self, bom_ref: str) -> None: logger.debug("Added %s to compositions.", bom_ref) - self.__assemblies.append(bom_ref) + self.__unknown_assemblies.append(bom_ref) -class DefaultAuthorOperation(Operation): - """ - If the SBOM metadata doesn't declare an author, this operation sets the field to 'automated'. - """ +@default +class DefaultAuthor(Operation): + """Sets author of the metadata component to 'automated', if missing.""" def handle_metadata(self, metadata: dict) -> None: authors = metadata.setdefault("authors", []) @@ -116,72 +219,75 @@ def handle_metadata(self, metadata: dict) -> None: authors.append({"name": "automated"}) +@default class InferSupplier(Operation): """ - At least one of the 'author', 'supplier' or 'publisher' fields must be set on any component but - the supplier field is desired. - If not already present this function will, try to infer a 'supplier.name' - and 'supplier.url'. - The supplier name will be inferred from: - - - If a 'publisher' is present, it is used as supplier name. - - If no 'publisher but an 'author' is present, it is used as supplier name. + Attempts to infer component supplier from other fields. + + CycloneDX contains numerous attributes on components to attest some sort of responsibility for + its creation or distribution with fine semantic differences between them. These include + 'author', 'authors', 'manufacturer', 'supplier', or 'publisher' and the list might grow in + future versions. + Unfortunately, the toolscape doesn't work equally well with all of these. For instance, + Dependency-Track ignores everything but 'author' and 'supplier'. + However, SBOMs generated by many tools do not always expose this information for all + components. Where it is missing, this operation attempts to infer a 'supplier' from available + data. + + The algorithm sets the 'supplier.name' to the first element found from the following list: + - 'publisher' + - 'author' The 'supplier.url' will be inferred from the following sources, in order of precedence: + - 'externalReference' of type 'website' + - 'externalReference' of type 'issue-tracker' + - 'externalReference' of type 'vcs' - - If an 'externalReference' of type 'website' is present, it is used as supplier URL. - - If an 'externalReference' of type 'issue-tracker' is present, it is used as supplier URL. - - If an 'externalReference' of type 'vcs' is present, it is used as supplier URL. - - For all of the URLs there is the additional condition that they must utilize the http or https - scheme. + For all of the URLs there is the additional condition that they must utilize either the 'http' + or 'https' scheme. """ def infer_supplier(self, component: dict) -> None: - if "url" not in component.get("supplier", {}): - - if "externalReferences" in component: - accepted_references = ("website", "issue-tracker", "vcs") - accepted_url_schemes = ("http://", "https://") - for key in accepted_references: - ext_ref = next( - ( - x - for x in component["externalReferences"] - if x.get("type") == key - ), - None, + if "supplier" in component: + return + + supplier = {} + + if "externalReferences" in component: + accepted_references = ("website", "issue-tracker", "vcs") + accepted_url_schemes = ("http://", "https://") + for key in accepted_references: + ext_ref = next( + ( + x + for x in component["externalReferences"] + if x.get("type") == key + ), + None, + ) + if ext_ref is not None and ( + any( + ext_ref["url"].startswith(scheme) + for scheme in accepted_url_schemes + ) + ): + supplier["url"] = [ext_ref["url"]] + logger.debug( + "Set supplier of %s to URL: %s", + component.get("bom-ref", ""), + ext_ref["url"], ) - if ext_ref is not None and ( - any( - ext_ref["url"].startswith(scheme) - for scheme in accepted_url_schemes - ) - ): - component["supplier"] = component.get("supplier", {}) - component["supplier"]["url"] = [ext_ref["url"]] - logger.debug( - "Set supplier of %s to URL: %s", - component.get("bom-ref", ""), - ext_ref["url"], - ) - break - - if "name" not in component.get("supplier", {}): - - if "publisher" in component: - component["supplier"] = component.get("supplier", {}) - component["supplier"]["name"] = component["publisher"] - return - - if "author" in component: - component["supplier"] = component.get("supplier", {}) - component["supplier"]["name"] = component["author"] - return - - def handle_component( - self, component: dict, path_to_license_folder: str = "" - ) -> None: + break + + if "publisher" in component: + supplier["name"] = component["publisher"] + elif "author" in component: + supplier["name"] = component["author"] + + if supplier: + component["supplier"] = supplier + + def handle_component(self, component: dict) -> None: self.infer_supplier(component) def handle_metadata(self, metadata: dict) -> None: @@ -189,55 +295,153 @@ def handle_metadata(self, metadata: dict) -> None: self.infer_supplier(component) -class ProcessLicense(Operation): +@default +class LicenseNameToId(Operation): """ - If there are components in "metadata" or "components" containing - licenses with the entry "name" instead of "id", this operation attempts - to replace the name with an SPDX-ID, extracted from a provided list of possible license names - with associated SPDX-ID. - - If the license contains a name and - a path to a folder with txt files containing license descriptions with the - naming convention 'license name'.txt is provided, - the program searches for a file with matching name - and, if found, copies its content in the field "text". + Attempts to infer SPDX ids from license names. + + For any license on a component or the metadata component that is declared with a name but no + id, this operation attempts to replace the name with a matching SPDX id. The operation + contains a lookup table of common license names to SPDX ids largely sourced from + https://github.com/CycloneDX/cyclonedx-core-java/ and https://spdx.org/licenses/. + + Licenses that already have an id are skipped. If no corresponding id can be found, the license + is also skipped. """ - list_of_license_names_string = ( - importlib.resources.files("cdxev.amend") - .joinpath("license_name_spdx_id_map.json") - .read_text(encoding="utf-8-sig") - ) - list_of_license_names = json.loads(list_of_license_names_string) + license_map: dict[str, str] = {} + + def prepare(self, sbom: dict) -> None: + license_mapping_file = ( + importlib.resources.files(__spec__.parent) / "license_name_spdx_id_map.json" # type: ignore[name-defined] # noqa: E501 + ) + license_mapping_json = license_mapping_file.read_text(encoding="utf-8-sig") + license_mapping = json.loads(license_mapping_json) + for mapping in license_mapping: + for name in mapping["names"]: + self.license_map[name.lower()] = mapping["exp"] + + def _do_it(self, license: dict, component: dict) -> None: + if license_has_id(license): + return + + name = license["name"].lower() + if name not in self.license_map: + return - def __init__(self) -> None: - self.path_to_license_folder = "" + id = self.license_map[name] + license["id"] = id + del license["name"] - def change_path_to_license_folder(self, path_to_license_folder: str) -> None: - self.path_to_license_folder = path_to_license_folder + component_id = ComponentIdentity.create(component, True) + logger.info( + LogMessage( + "License name replaced with id", + f"License '{name}' of component {component_id} replaced with id '{id}'", + ) + ) def handle_metadata(self, metadata: dict) -> None: if "component" not in metadata: return - process_license( - metadata["component"], - self.list_of_license_names, - self.path_to_license_folder, - ) - delete_license_unknown(metadata["component"]) + + foreach_license(self._do_it, metadata["component"]) def handle_component(self, component: dict) -> None: - process_license( - component, self.list_of_license_names, self.path_to_license_folder - ) - delete_license_unknown(component) + foreach_license(self._do_it, component) + + +class AddLicenseText(Operation): + """ + Adds user-provided license texts to licenses with a specific name (not id). + + When using this operation, the user must also specify a directory where license texts are + stored. + Texts are expected in one file per license, where the filename must match the license name + declared in the SBOM. The filename's extension is ignored or might even be missing. + + This operation skips licenses with an SPDX id as well as licenses which already contain a text. + """ + + license_files: dict[str, Path] = {} + """Maps filenames to path.""" + aliases: dict[str, str] = {} + """Maps filename without extension to full filename.""" + + def __init__(self, license_dir: Path) -> None: + """ + :param license_dir: Path to a folder with files containing license texts. + """ + self.license_dir = license_dir + + def _add_text(self, license: dict, text: str) -> None: + license["text"] = {"content": text} + + @cache + def _find_text(self, license_name: str) -> t.Optional[str]: + if license_name in self.aliases: + license_name = self.aliases[license_name] + + if license_name not in self.license_files: + return None + + file = self.license_files[license_name] + match = charset_normalizer.from_path(file).best() + if match is None: + raise AppError("File encoding cannot be determined", module_name=str(file)) + text = str(match) + # Escape string for inclusion in json. The slice is to remove the surrounding + # double-quotes added by json.dumps() + return json.dumps(text)[1:-1] + + def _do_it(self, license: dict, component: dict) -> None: + if license_has_id(license) or license_has_text(license): + return + + name = license["name"] + text = self._find_text(name.lower()) + if text: + component_id = ComponentIdentity.create(component, True) + logger.info( + LogMessage( + "License text added", + f"Added text of license '{name}' to component {component_id}", + ) + ) + self._add_text(license, text) + + def prepare(self, sbom: dict) -> None: + if not self.license_dir.is_dir(): + raise AppError( + "License directory not found", + "Not found or not a directory: " + str(self.license_dir), + ) + + listing = (file for file in self.license_dir.glob("*") if file.is_file()) + for file in listing: + self.license_files[file.name.lower()] = file + self.aliases[file.stem.lower()] = file.name.lower() + + def handle_metadata(self, metadata: dict) -> None: + if "component" not in metadata: + return + + foreach_license(self._do_it, metadata["component"]) + + def handle_component(self, component: dict) -> None: + foreach_license(self._do_it, component) class InferCopyright(Operation): """ - If neither a license nor a copyright is present in a component, - this function will create a 'copyright' field in the schema - 'supplier.name year, all rights reserved' + Attempts to infer copyright claims from supplier. + + If neither copyright nor license is present on a component but there is a supplier, + this operation generates the copyright field from the supplier in the format + `Copyright (c) , all rights reserved`. + + Because of the risk of generating false copyright claims, this operation is disabled by + default. """ def infer_copyright(self, component: dict) -> None: @@ -254,12 +458,69 @@ def infer_copyright(self, component: dict) -> None: supplier_name = component.get("supplier", {}).get("name", "") copyright = f"Copyright (c) {year} {supplier_name}" component["copyright"] = copyright + component_id = ComponentIdentity.create(component, True) + logger.info(f"Copyright claim '{copyright}' added to {component_id}.") - def handle_component( - self, component: dict, path_to_license_folder: str = "" - ) -> None: + def handle_component(self, component: dict) -> None: self.infer_copyright(component) def handle_metadata(self, metadata: dict) -> None: component = metadata.get("component", {}) self.infer_copyright(component) + + +class DeleteAmbiguousLicenses(Operation): + """ + Deletes license claims which are solely identified by the `name` property. + + Licenses that contain only a name property but no URL or text for context provide little + informational value beyond the fact that some form of license is present. + In certain cases it can therefore be beneficial to remove such clutter from an SBOM. + + Because of the risk involved in accidentally removing important data, this operation is + disabled by default. + """ + + def _has_text(self, license: dict) -> bool: + return license.get("text", {}).get("content", "") != "" + + def _has_url(self, license: dict) -> bool: + return license.get("url", "") != "" + + def _has_name_only(self, license: dict) -> bool: + # Any fields other than name, text, or url mean the license shouldn't be deleted. + if any(field not in ["name", "text", "url"] for field in license.keys()): + return False + + # Make sure that, if name or url are present, they aren't empty. + return not (self._has_text(license) or self._has_url(license)) + + def _keep_license(self, license: dict) -> bool: + if "license" not in license: + return True + + return not self._has_name_only(license["license"]) + + def _filter_licenses(self, component: dict) -> None: + if "licenses" not in component: + return + + licenses = component["licenses"] + licenses = list( + filter( + self._keep_license, + licenses, + ) + ) + if licenses: + component["licenses"] = licenses + else: + del component["licenses"] + + def handle_metadata(self, metadata: dict) -> None: + if "component" not in metadata: + return + self._filter_licenses(metadata["component"]) + + def handle_component(self, component: dict) -> None: + self._filter_licenses(component) diff --git a/cdxev/amend/process_license.py b/cdxev/amend/process_license.py deleted file mode 100644 index f51bde07..00000000 --- a/cdxev/amend/process_license.py +++ /dev/null @@ -1,245 +0,0 @@ -################################################## -# Function adds the id to a Software Bill of Materials, if only the name is given and the -# given name is found in the reference list of possible names -################################################## - -import logging -import os -import re -from typing import Sequence - -from cdxev.auxiliary.identity import ComponentIdentity -from cdxev.error import AppError -from cdxev.log import LogMessage - -logger = logging.getLogger(__name__) - - -def find_license_id(license_name: str, license_namelist: Sequence[dict]) -> str: - """ - Searches in the given list for the name and returns the - SPDX-ID of the license, if existing. - - Parameters - ---------- - license_name: str - Name of a license. - license_namelist: list - Possible names of licenses and the SPDX-ID. - - Returns - ------- - str: - SPDX-ID of the given string. - """ - license_id = "" - if isinstance(license_name, str): - for dicts in license_namelist: - if license_name.lower() == dicts.get("exp", "").lower(): - license_id = dicts.get("exp", "") - else: - for name in dicts.get("names", []): - if license_name.lower() == name.lower(): - license_id = dicts.get("exp", "") - return license_id - - -def process_license( - component: dict, license_name_id_list: list, path_to_license_folder: str = "" -) -> None: - """ - Adds the SPDX-ID of a license to a component and removes the name, if the name - is in the list of licenses provided. - - If the path to a folder with txt files containing license descriptions with the - naming convention 'license name'.txt is given, - the program searches for a file with matching name - and, if found, copies its content in the field "text". - - The operation is performed on the provided component. - - Parameters - ---------- - :component: dict - A component. - :license_name_id_map: list - A list with possible license names - belonging to a license with SPDX-ID. - :path_to_license_folder: str (optional) - Path to a folder with txt files containing license texts. - - Returns - ------- - - """ - licenses = component.get("licenses", []) - if not licenses: - return - - component_id = ComponentIdentity.create(component, allow_unsafe=True) - - for license in licenses: - if "license" not in license: - continue - - current_license = license.get("license", {}) - if "id" in current_license: - continue - - replace_license_name_with_id(current_license, license_name_id_list) - add_text_from_folder_to_license_with_name( - current_license, path_to_license_folder, component_id - ) - - return - - -def replace_license_name_with_id(license: dict, license_name_id_list: list) -> None: - """ - Adds the SPDX-ID of a license to a license and removes the name, if the name - is in the list of licenses provided. - - The operation is performed on the provided license. - - Parameters - ---------- - :license: dict - A license. - :license_name_id_map: list - A list with possible license names and - belonging to a license id. - - Returns - ------- - - """ - if "id" in license: - return - - id_found = find_license_id(license.get("name", ""), license_name_id_list) - if id_found: - license["id"] = id_found - license.pop("name") - return - - -def add_text_from_folder_to_license_with_name( - license: dict, - path_to_license_folder: str = "", - component_id: ComponentIdentity = ComponentIdentity.create({}, allow_unsafe=True), -) -> None: - """ - Adds the text describing a license, - if the provided folder contains a corresponding txt-file with the text of the license. - The txt-file has to follow the naming convention 'license name'.txt. - - The operation is performed on the provided license. - - Parameters - ---------- - :license: dict - A license. - :path_to_license_folder: str - The path to a folder with txt-files containing license descriptions. - :component_id (optional): ComponentIdentity - The ComponentIdentity of the component the submitted license belongs to. - - Returns - ------- - - """ - if path_to_license_folder and license.get("name", ""): - license_text = get_license_text_from_folder( - license.get("name", ""), path_to_license_folder - ) - if license_text == "": - logger.warning( - LogMessage( - "License text not found", - ( - f"No text for the license ({license.get('name', '')}), " - f"in component ({component_id}), was found. " - "An empty string was added as text." - ), - ) - ) - else: - if license.get("text", {}).get("content", "") != "": - logger.warning( - LogMessage( - "License text replaced", - ( - f"The license text of the license ({license.get('name', '')})," - f" in component ({component_id}), was overwritten." - ), - ) - ) - logger.info( - LogMessage( - "License text added", - ( - f"The text of the license ({license.get('name', '')})," - f" in component ({component_id}), was added." - ), - ) - ) - license["text"] = {"content": license_text} - return - - -def get_license_text_from_folder(license_name: str, path_to_license_folder: str) -> str: - """ - Searches in given folder for a txt-file with the name of of a given license and - returns the file's content as a string. - - Parameters - ---------- - :license_name: str - Name of the license. - :path_to_license_folder: str - Path to a folder with txt-files containing license descriptions. - - Returns - ------- - str : the content of the file. - """ - if os.path.isdir(path_to_license_folder): - file_name = license_name + ".txt" - for licenses_text_file in os.listdir(path_to_license_folder): - if licenses_text_file == file_name: - with open(os.path.join(path_to_license_folder, file_name)) as f: - license_text = f.read() - return license_text - return "" - else: - if not os.path.exists(path_to_license_folder): - raise AppError( - "Invalid path to license folder", - (f"The submitted path ({path_to_license_folder})" " does not exist."), - ) - else: - raise AppError( - "Invalid path to license folder", - ( - f"The submitted path ({path_to_license_folder})" - " does not lead to a folder." - ), - ) - - -def delete_license_unknown(component: dict) -> None: - if not component.get("licenses", {}): - return - - regex = re.compile(r"[Uu][Nn][Kk][Nn][Oo][Ww][Nn]") - licenses_filtered = [] - for license in component.get("licenses", []): - if license.get("license", {}).get("text", {}).get("content", ""): - licenses_filtered.append(license) - - elif not regex.search(license.get("license", {}).get("name", "false")): - licenses_filtered.append(license) - if licenses_filtered: - component["licenses"] = licenses_filtered - else: - component.pop("licenses") diff --git a/docs/available_commands.md b/docs/available_commands.md index b4b5a1cd..dc7afbf5 100644 --- a/docs/available_commands.md +++ b/docs/available_commands.md @@ -8,24 +8,13 @@ Before use, please consider the [known limitations](https://festo-se.github.io/c ## amend -This command accepts a single input file and will add any missing fields to it which can be automatically inferred. - -Currently, the command adds or modifies the following pieces of information: - -* If the SBOM metadata doesn't specify an *author* from the SBOM, it will be set to `{"name": "automated"}`. -* The *compositions* array will be overwritten with a new one which specifies a single *incomplete* aggregate. This aggregate contains all components, including the metadata component. -* If a component does have a publisher and/or author but does not have a *supplier*, the tool will try to infer the `supplier.name` from the fields (in order of precedence): - * *publisher* - * *author* -* If a component contains externalReferences and no supplier.url is provided, the tool will try to infer the *supplier.url* from (in order of precedence): - * *externalReferences* of type *website* - * *externalReferences* of type *issue-tracker* - * *externalReferences* of type *vcs* -* Generates a *bom-ref* for components which don't have one, yet. The *bom-ref* will be a GUID. -* If the path to a folder with license text files is provided, the text will be included in the SBOM, if the license has the corresponding `name`. -* If a `license.name` is similar to an SPDX-ID, it will be replaced, e.g. `{"license": {"name": "The Apache License, Version 2.0"}}` leads to `{"license": {"id": "Apache-2.0"}}`. For this purpose a [JSON-file](https://github.com/Festo-se/cyclonedx-editor-validator/blob/main/cdxev/amend/license_name_spdx_id_map.json) is used, where we provide a mapping of license names to SPDX-IDs, based on this [license-mapping](https://github.com/CycloneDX/cyclonedx-core-java/blob/master/src/main/resources/license-mapping.json). -* If the SBOM contains a license which `name` includes any variation of the letter sequence "unknown" and no or an empty `text`, the license will be removed. Empty `licenses` fields will also be removed. -* If neither `licenses` nor `copyright` exist but a `supplier.name` is present, the tool will create a `copyright` with the content "Copyright (c) `current year` `supplier.name`". For example "Copyright (c) 2024 Acme Inc." is created from the `{"supplier":{"name": "Acme Inc."}}` in the year 2024. +This command accepts a single input file and will apply one or multiple *operations* to it. Each operation modifies certain aspects of the SBOM. These modifications cannot be targetted at individual components in the SBOM which sets the *amend* command apart from *set*. It's use-case is ensuring an SBOM fulfils certain requirements in an automated fashion. + +See the command help with `cdx-ev amend --help` for a list of available operations. All operations marked `[default]` will run unless the command-line option `--operation` is provided. + +For more information on a particular operation, use the `cdx-ev amend --help-operation ` command. + +Note that the order of operations cannot be controlled. If you want to ensure two operations run in a certain order you must run the command twice, each time with a different set of operations. ### Copy license texts from files diff --git a/poetry.lock b/poetry.lock index 044f966d..0e74e0b8 100644 --- a/poetry.lock +++ b/poetry.lock @@ -119,6 +119,105 @@ files = [ {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, ] +[[package]] +name = "charset-normalizer" +version = "3.3.2" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"}, + {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, +] + [[package]] name = "click" version = "8.1.7" @@ -219,6 +318,17 @@ files = [ {file = "distlib-0.3.8.tar.gz", hash = "sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64"}, ] +[[package]] +name = "docstring-parser" +version = "0.16" +description = "Parse Python docstrings in reST, Google and Numpydoc format" +optional = false +python-versions = ">=3.6,<4.0" +files = [ + {file = "docstring_parser-0.16-py3-none-any.whl", hash = "sha256:bf0a1387354d3691d102edef7ec124f219ef639982d096e26e3b60aeffa90637"}, + {file = "docstring_parser-0.16.tar.gz", hash = "sha256:538beabd0af1e2db0146b6bd3caa526c35a34d61af9fd2887f3a8a27a739aa6e"}, +] + [[package]] name = "exceptiongroup" version = "1.2.0" @@ -1057,4 +1167,4 @@ tests = ["pytest", "pytest-cov"] [metadata] lock-version = "2.0" python-versions = "^3.9.0" -content-hash = "e5ec914723690322631ba9aa7afe363749860a274627d49d8d09893cde02f3ea" +content-hash = "03c957b2eeca1b07bf0d1fc637f10e6d725c5c7bd0b23a1dfa105c0fddab38bb" diff --git a/pyproject.toml b/pyproject.toml index aef2223c..584266e2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,8 @@ cdx-ev = "cdxev.__main__:main" python = "^3.9.0" python-dateutil = "2.9.0.post0" jsonschema = {version = "4.22.0", extras = ["format"]} +docstring-parser = "^0.16" +charset-normalizer = "^3.3.2" [tool.poetry.group.dev.dependencies] flake8 = "7.0.0" diff --git a/tests/auxiliary/licenses/Apache-1.0.txt b/tests/auxiliary/licenses/Apache-1.0.txt new file mode 100644 index 00000000..95954498 --- /dev/null +++ b/tests/auxiliary/licenses/Apache-1.0.txt @@ -0,0 +1 @@ +Dummy text \ No newline at end of file diff --git a/tests/auxiliary/licenses/UPPERCASE.license b/tests/auxiliary/licenses/UPPERCASE.license new file mode 100644 index 00000000..aff40481 --- /dev/null +++ b/tests/auxiliary/licenses/UPPERCASE.license @@ -0,0 +1 @@ +UPPERCASE LICENSE \ No newline at end of file diff --git a/tests/auxiliary/licenses/license_name.txt b/tests/auxiliary/licenses/license_name similarity index 100% rename from tests/auxiliary/licenses/license_name.txt rename to tests/auxiliary/licenses/license_name diff --git a/tests/auxiliary/test_amend_sboms/test.cdx.json b/tests/auxiliary/test_amend_sboms/test.cdx.json index 29c705cb..43cebd3f 100644 --- a/tests/auxiliary/test_amend_sboms/test.cdx.json +++ b/tests/auxiliary/test_amend_sboms/test.cdx.json @@ -17,8 +17,13 @@ "name": "test-app", "version": "1.0.0", "bom-ref": "pkg:npm/test-app@1.0.0", - "author": "Company SE", - "purl": "pkg:npm/test-app@1.0.0" + "purl": "pkg:npm/test-app@1.0.0", + "externalReferences": [ + { + "type": "website", + "url": "https://www.company.org" + } + ] } }, "components": [ @@ -185,6 +190,12 @@ { "aggregate": "incomplete", "assemblies": ["com.company.unit/depA@4.0.2"] + }, + { + "aggregate": "not_specified", + "assemblies": [ + "pkg:npm/test-app@1.0.0" + ] } ] } diff --git a/tests/test_amend.py b/tests/test_amend.py index 88fcca29..761cdd96 100644 --- a/tests/test_amend.py +++ b/tests/test_amend.py @@ -3,114 +3,106 @@ import json import typing as t import unittest +from pathlib import Path -from cdxev.amend import process_license from cdxev.amend.command import run as run_amend from cdxev.amend.operations import ( - AddBomRefOperation, - CompositionsOperation, - DefaultAuthorOperation, + AddBomRef, + AddLicenseText, + Compositions, + DefaultAuthor, + DeleteAmbiguousLicenses, InferCopyright, InferSupplier, + LicenseNameToId, Operation, - ProcessLicense, ) from cdxev.error import AppError -from tests.auxiliary.sbomFunctionsTests import compare_sboms path_to_folder_with_test_sboms = "tests/auxiliary/test_amend_sboms/" class AmendTestCase(unittest.TestCase): + operation: Operation + def setUp(self) -> None: with open( path_to_folder_with_test_sboms + "test.cdx.json", encoding="utf_8" ) as file: self.sbom_fixture = json.load(file) + def test_no_metadata_component_doesnt_raise(self) -> None: + if not hasattr(self, "operation"): + self.skipTest("Skipped on abstract base test case") -class CommandIntegrationTestCase(AmendTestCase): - def test_compositions(self) -> None: - run_amend(self.sbom_fixture) - - expected_assemblies = [ - "pkg:npm/test-app@1.0.0", - "com.company.unit/depA@4.0.2", - "some-vendor/depB@1.2.3", - "some-vendor/depB@1.2.3:physics/gravity@0.0.1", - "some-vendor/depB@1.2.3:physics/x-ray@18.9.5", - "some-vendor/depB@1.2.3:physics/x-ray@18.9.5:Rudolph@6.6.6", - "depC@3.2.1", - "depC@3.2.1:Rudolph@6.6.6", - ] - expected_assemblies.sort() - self.sbom_fixture["compositions"][0]["assemblies"].sort() - self.assertSequenceEqual( - self.sbom_fixture["compositions"][0]["assemblies"], - expected_assemblies, - ) - - def test_meta_author(self) -> None: - run_amend(self.sbom_fixture) + del self.sbom_fixture["metadata"]["component"] + self.operation.handle_metadata(self.sbom_fixture["metadata"]) - self.assertSequenceEqual( - self.sbom_fixture["metadata"]["authors"], [{"name": "automated"}] - ) + def test_empty_component_doesnt_raise(self) -> None: + if not hasattr(self, "operation"): + self.skipTest("Skipped on abstract base test case") - def test_suppliers(self) -> None: - run_amend(self.sbom_fixture) - components = self.sbom_fixture["components"] - self.assertIn("supplier", components[0]) - self.assertDictEqual( - { - "name": "Some Vendor Inc.", - "url": ["https://www.some-vendor.com"], - }, - components[1]["supplier"], - ) - self.assertDictEqual( - {"url": ["https://www.universe.com"]}, - components[1]["components"][0]["supplier"], - ) - self.assertNotIn("supplier", components[1]["components"][1]) - self.assertDictEqual( - {"url": ["https://northpole.com/rudolph.git"]}, - components[1]["components"][1]["components"][0]["supplier"], - ) - self.assertDictEqual( - { - "name": "Some Vendor Inc.", - "url": ["https://www.some-vendor.com"], - }, - components[1]["supplier"], - ) - self.assertNotIn("supplier", components[2]) + self.operation.handle_component({}) class CompositionsTestCase(AmendTestCase): def setUp(self) -> None: super().setUp() - self.operation = CompositionsOperation() + self.operation = Compositions() def test_compositions_cleared(self) -> None: self.operation.prepare(self.sbom_fixture) self.assertSequenceEqual( self.sbom_fixture["compositions"], - [{"aggregate": "incomplete", "assemblies": []}], + [{"aggregate": "unknown", "assemblies": []}], ) - def test_meta_component_added(self) -> None: + def test_meta_component_keeps_aggregate(self) -> None: self.operation.prepare(self.sbom_fixture) self.operation.handle_metadata(self.sbom_fixture["metadata"]) - self.assertSequenceEqual( - self.sbom_fixture["compositions"][0]["assemblies"], - ["pkg:npm/test-app@1.0.0"], + self.assertTrue( + any( + comp["aggregate"] == "not_specified" + and comp["assemblies"] == ["pkg:npm/test-app@1.0.0"] + for comp in self.sbom_fixture["compositions"] + ) + ) + + def test_meta_component_keeps_unknown_aggregate(self) -> None: + self.sbom_fixture["compositions"][2]["aggregate"] = "unknown" + self.operation.prepare(self.sbom_fixture) + self.operation.handle_metadata(self.sbom_fixture["metadata"]) + + self.assertTrue( + self.sbom_fixture["metadata"]["component"]["bom-ref"] + in self.sbom_fixture["compositions"][0]["assemblies"] + ) + + def test_meta_component_missing(self) -> None: + del self.sbom_fixture["metadata"]["component"] + self.operation.prepare(self.sbom_fixture) + self.operation.handle_metadata(self.sbom_fixture["metadata"]) + + # Assert that all compositions are empty + self.assertFalse( + any(comp["assemblies"] for comp in self.sbom_fixture["compositions"]) + ) + + def test_meta_component_not_in_compositions(self) -> None: + del self.sbom_fixture["compositions"][2] + self.operation.prepare(self.sbom_fixture) + self.operation.handle_metadata(self.sbom_fixture["metadata"]) + + # Assert that all compositions are empty + self.assertFalse( + any(comp["assemblies"] for comp in self.sbom_fixture["compositions"]) ) def test_components_added(self) -> None: self.operation.prepare(self.sbom_fixture) flat_walk_components(self.operation, self.sbom_fixture["components"]) + self.assertEqual(self.sbom_fixture["compositions"][0]["aggregate"], "unknown") self.assertSequenceEqual( self.sbom_fixture["compositions"][0]["assemblies"], ["com.company.unit/depA@4.0.2", "some-vendor/depB@1.2.3", "depC@3.2.1"], @@ -120,7 +112,7 @@ def test_components_added(self) -> None: class DefaultAuthorTestCase(AmendTestCase): def setUp(self) -> None: super().setUp() - self.operation = DefaultAuthorOperation() + self.operation = DefaultAuthor() def test_default_author_added(self) -> None: self.operation.handle_metadata(self.sbom_fixture["metadata"]) @@ -162,69 +154,24 @@ def test_supplier_already_present(self) -> None: def test_publisher_is_preferred_to_author(self) -> None: component = {"author": "x", "publisher": "y"} - expected = {"author": "x", "publisher": "y", "supplier": {"name": "y"}} + expected = {"author": "x", "supplier": {"name": "y"}, "publisher": "y"} self.operation.handle_component(component) self.assertDictEqual(expected, component) - def test_author_set_supplier_in_metadata(self) -> None: - run_amend(self.sbom_fixture) - self.assertEqual( - self.sbom_fixture["metadata"]["component"]["supplier"]["name"], - self.sbom_fixture["metadata"]["component"]["author"], - ) - - def test_author_set_supplier_components(self) -> None: - self.sbom_fixture["components"][0].pop("externalReferences") - run_amend(self.sbom_fixture) - self.assertEqual( - self.sbom_fixture["components"][0]["supplier"]["name"], - self.sbom_fixture["components"][0]["author"], - ) - - def test_supplier_get_not_overwritten(self) -> None: - self.sbom_fixture["components"][0]["supplier"] = { - "bom-ref": "Reference to a supplier entry", - "name": "Some name of a supplier", - "url": "https://someurl.com", - } - run_amend(self.sbom_fixture) - self.assertEqual( - self.sbom_fixture["components"][0]["supplier"]["name"], - "Some name of a supplier", - ) - self.assertEqual( - self.sbom_fixture["components"][0]["supplier"]["url"], "https://someurl.com" - ) - self.assertEqual( - self.sbom_fixture["components"][0]["supplier"]["bom-ref"], - "Reference to a supplier entry", - ) - - def test_supplier_add_url_to_name(self) -> None: - self.sbom_fixture["components"][0]["supplier"] = { - "name": "Some name of a supplier" - } - run_amend(self.sbom_fixture) - self.assertEqual( - self.sbom_fixture["components"][0]["supplier"]["name"], - "Some name of a supplier", - ) - self.assertEqual( - self.sbom_fixture["components"][0]["supplier"]["url"][0], - self.sbom_fixture["components"][0]["externalReferences"][0]["url"], - ) + def test_empty_component_stays_empty(self): + component = {} + expected = {} + self.operation.handle_component(component) + self.assertDictEqual(component, expected) - def test_supplier_set_nothing_in_an_empty_component(self) -> None: - self.sbom_fixture["components"][0] = {"bom-ref": "component 0"} + def test_author_set_supplier_in_metadata(self) -> None: + expected = copy.deepcopy(self.sbom_fixture["metadata"]["component"]) + expected["supplier"] = {"url": ["https://www.company.org"]} run_amend(self.sbom_fixture) - self.assertEqual(self.sbom_fixture["components"][0], {"bom-ref": "component 0"}) - def test_supplier_add_name_to_url(self) -> None: - self.sbom_fixture["components"][0]["supplier"] = {"url": "https://someurl.com"} - run_amend(self.sbom_fixture) self.assertEqual( - self.sbom_fixture["components"][0]["supplier"]["name"], - self.sbom_fixture["components"][0]["author"], + self.sbom_fixture["metadata"]["component"], + expected, ) def test_supplier_from_website(self) -> None: @@ -260,11 +207,25 @@ def test_supplier_from_vcs(self) -> None: self.operation.handle_component(component) self.assertDictEqual(expected, component) + def test_add_name_and_url(self) -> None: + component = { + "author": "Author", + "externalReferences": [ + {"type": "vcs", "url": "https://y.com"}, + {"type": "website", "url": "https://x.com"}, + ], + } + expected = copy.deepcopy(component) | { + "supplier": {"name": "Author", "url": ["https://x.com"]} + } + self.operation.handle_component(component) + self.assertDictEqual(expected, component) + class AddBomRefTestCase(AmendTestCase): def setUp(self) -> None: super().setUp() - self.operation = AddBomRefOperation() + self.operation = AddBomRef() def test_add_bom_ref_to_metadata(self) -> None: metadata = {"component": {"type": "application", "name": "test"}} @@ -293,59 +254,33 @@ def test_dont_overwrite(self) -> None: self.assertEqual("already-present", component["bom-ref"]) -class ProcessLicenseTestCase(AmendTestCase): +class LicenseNameToIdTestCase(AmendTestCase): def setUp(self) -> None: super().setUp() - self.operation = ProcessLicense() + self.operation = LicenseNameToId() + self.operation.prepare(self.sbom_fixture) def test_replace_name_with_id(self) -> None: - self.sbom_fixture["metadata"]["component"]["licenses"] = [ - {"license": {"name": "Apache License"}} - ] - self.operation.handle_metadata(self.sbom_fixture["metadata"]) - self.assertEqual( - self.sbom_fixture["metadata"]["component"]["licenses"][0]["license"]["id"], - "Apache-1.0", - ) - self.sbom_fixture["components"][0]["licenses"] = [ - {"license": {"name": "Apache License"}} - ] - self.operation.handle_component(self.sbom_fixture["components"][0]) - self.assertEqual( - self.sbom_fixture["components"][0]["licenses"][0]["license"]["id"], - "Apache-1.0", - ) + component = { + "licenses": [ + {"license": {"name": "Apache License"}}, + {"license": {"name": "GNU Lesser General Public License, Version 2.1"}}, + {"license": {"name": "Some random name"}}, + ] + } + expected = { + "licenses": [ + {"license": {"id": "Apache-1.0"}}, + {"license": {"id": "LGPL-2.1-only"}}, + {"license": {"name": "Some random name"}}, + ] + } + self.operation.handle_component(component) + self.assertDictEqual(component, expected) - def test_no_component_in_metadata(self) -> None: - exception_thrown = False - test_sbom = copy.deepcopy(self.sbom_fixture) - test_sbom["metadata"].pop("component") - try: - self.operation.handle_metadata(test_sbom["metadata"]) - except KeyError: - exception_thrown = True - self.assertFalse(exception_thrown) - - def test_no_name_and_no_id_in_license(self) -> None: - exception_thrown = False - test_sbom = copy.deepcopy(self.sbom_fixture) - test_sbom["components"][0]["licenses"] = [{}] - try: - self.operation.handle_component(test_sbom["components"][0]) - except KeyError: - exception_thrown = True - self.operation.handle_component(test_sbom["components"][0]) - self.assertFalse(exception_thrown) - - def test_empty_component(self) -> None: - exception_thrown = False - test_sbom = copy.deepcopy(self.sbom_fixture) - test_sbom["components"] = [{}] - try: - self.operation.handle_component(test_sbom["components"][0]) - except KeyError: - exception_thrown = True - self.assertFalse(exception_thrown) + def test_no_name_and_no_id_in_license_doesnt_raise(self) -> None: + self.sbom_fixture["components"][0]["licenses"] = [{}] + self.operation.handle_component(self.sbom_fixture["components"][0]) def flat_walk_components( @@ -359,386 +294,103 @@ def flat_walk_components( operation.handle_component(c) -class TestReplaceLicenseNameWithIdFunctions(unittest.TestCase): - def test_find_license_id(self) -> None: - with open( - (path_to_folder_with_test_sboms + "/example_list_with_license_names.json"), - "r", - encoding="utf-8-sig", - ) as my_file: - list_of_license_names = json.load(my_file) - for licenses in list_of_license_names: - license_id = licenses["exp"] - for names in licenses["names"]: - self.assertEqual( - process_license.find_license_id(names, list_of_license_names), - license_id, - ) - - def test_find_license_id_fail(self) -> None: - with open( - (path_to_folder_with_test_sboms + "example_list_with_license_names.json"), - "r", - encoding="utf-8-sig", - ) as my_file: - list_of_license_names = json.load(my_file) - self.assertEqual( - process_license.find_license_id(10, list_of_license_names), # type: ignore - "", - ) - self.assertEqual( - process_license.find_license_id("no license", list_of_license_names), "" - ) - self.assertEqual( - process_license.find_license_id({}, list_of_license_names), # type: ignore - "", - ) - - def test_process_license_replace_name_with_id(self) -> None: - with open( - (path_to_folder_with_test_sboms + "example_list_with_license_names.json"), - "r", - encoding="utf-8-sig", - ) as my_file: - list_of_license_names = json.load(my_file) - with open( - (path_to_folder_with_test_sboms + "bom_licenses_changed.json"), - "r", - encoding="utf-8-sig", - ) as my_file: - sbom = json.load(my_file) - with open( - (path_to_folder_with_test_sboms + "bom_licenses_changed_with_id.json"), - "r", - encoding="utf-8-sig", - ) as my_file: - sbom_with_id = json.load(my_file) - process_license.process_license( - sbom["metadata"]["component"], list_of_license_names - ) - for component in sbom["components"]: - process_license.process_license(component, list_of_license_names) - self.assertTrue(compare_sboms(sbom, sbom_with_id)) - - -class GetLicenseTextFromFile(unittest.TestCase): - def test_get_license_text_from_folder(self) -> None: - path_to_license_folder = "tests/auxiliary/licenses" - license_text = process_license.get_license_text_from_folder( - "license_name", path_to_license_folder - ) - self.assertEqual(license_text, "The text describing a license.") +class AddLicenseTextTestCase(AmendTestCase): + def setUp(self) -> None: + super().setUp() + license_dir = Path("tests/auxiliary/licenses") + self.operation = AddLicenseText(license_dir) + self.operation.prepare(self.sbom_fixture) - def test_process_license_replace_license_text(self) -> None: - path_to_license_folder = "tests/auxiliary/licenses" - with open( - (path_to_folder_with_test_sboms + "example_list_with_license_names.json"), - "r", - encoding="utf-8-sig", - ) as my_file: - list_of_license_names = json.load(my_file) + def test_add_text(self): component = { - "type": "library", - "bom-ref": "pkg:nuget/some name@1.3.3", - "publisher": "some publisher", - "name": "some name", - "version": "1.3.2", - "cpe": "", - "description": "some description", - "scope": "required", - "hashes": [{"alg": "SHA-512", "content": "5F6996E38A31861449A493B938"}], "licenses": [ - {"license": {"name": "license_name", "text": {"content": "other text"}}} - ], - "copyright": "Copyright 2000-2021 some name Contributors", - "purl": "pkg:nuget/some name@1.3.2", + {"license": {"id": "Apache-1.0"}}, + {"license": {"name": "license_name"}}, + ] } - process_license.process_license( - component, list_of_license_names, path_to_license_folder - ) - self.assertEqual( - component["licenses"][0]["license"]["text"]["content"], # type: ignore - "The text describing a license.", - ) - - def test_process_license_add_license_text(self) -> None: - path_to_license_folder = "tests/auxiliary/licenses" - with open( - (path_to_folder_with_test_sboms + "example_list_with_license_names.json"), - "r", - encoding="utf-8-sig", - ) as my_file: - list_of_license_names = json.load(my_file) - component = { - "type": "library", - "bom-ref": "pkg:nuget/some name@1.3.3", - "publisher": "some publisher", - "name": "some name", - "version": "1.3.2", - "cpe": "", - "description": "some description", - "scope": "required", - "hashes": [{"alg": "SHA-512", "content": "5F6996E38A31861449A493B938"}], + expected = { "licenses": [ + {"license": {"id": "Apache-1.0"}}, { "license": { "name": "license_name", + "text": {"content": "The text describing a license."}, } - } - ], - "copyright": "Copyright 2000-2021 some name Contributors", - "purl": "pkg:nuget/some name@1.3.2", + }, + ] } - process_license.process_license( - component, list_of_license_names, path_to_license_folder - ) - self.assertEqual( - component["licenses"][0]["license"]["text"]["content"], # type: ignore - "The text describing a license.", - ) + self.operation.handle_component(component) + self.assertDictEqual(component, expected) - def test_process_license_add_license_text_with_space(self) -> None: - path_to_license_folder = "tests/auxiliary/licenses" - with open( - (path_to_folder_with_test_sboms + "example_list_with_license_names.json"), - "r", - encoding="utf-8-sig", - ) as my_file: - list_of_license_names = json.load(my_file) + def test_keep_existing_text(self): component = { - "type": "library", - "bom-ref": "pkg:nuget/some name@1.3.3", - "publisher": "some publisher", - "name": "some name", - "version": "1.3.2", - "cpe": "", - "description": "some description", - "scope": "required", - "hashes": [{"alg": "SHA-512", "content": "5F6996E38A31861449A493B938"}], "licenses": [ - { - "license": { - "name": "another license", - } - } - ], - "copyright": "Copyright 2000-2021 some name Contributors", - "purl": "pkg:nuget/some name@1.3.2", + {"license": {"name": "license_name", "text": {"content": "My text."}}}, + ] } - process_license.process_license( - component, list_of_license_names, path_to_license_folder - ) - self.assertEqual( - component["licenses"][0]["license"]["text"]["content"], # type: ignore - "The text describing another license.", - ) - - def test_error_messages_does_not_exist(self) -> None: - path_to_license_folder = "thispathdoesnotexist" - with self.assertRaises(AppError) as ae: - process_license.get_license_text_from_folder( - "license_name", path_to_license_folder - ) - self.assertIn( - "The submitted path thispathdoesnotexist does not exist.", - ae.exception.details.description, - ) - - def test_error_messages_not_a_folder(self) -> None: - path_to_license_folder = "tests/test_amend.py" - with self.assertRaises(AppError) as ae: - process_license.get_license_text_from_folder( - "license_name", path_to_license_folder - ) - self.assertIn( - "The submitted path (tests/test_amend.py) does not lead to a folder.", - ae.exception.details.description, - ) - + expected = copy.deepcopy(component) + self.operation.handle_component(component) + self.assertDictEqual(component, expected) -class TestDeleteUnknownComponent(AmendTestCase): - def test_delete_unknown_component(self) -> None: - licenses = [ - {"license": {"name": "unknown.something"}}, - {"license": {"name": "whateverUnknown"}}, - {"license": {"name": "unKnowN etc"}}, - {"license": {"name": "AunknowNM", "text": {"content": ""}}}, - {"license": {"name": "unknown", "text": {"content": "license text"}}}, - {"license": {"name": "some license", "text": {"content": "license text"}}}, - {"license": {"name": "some license"}}, - {"license": {"name": ""}}, - ] - component = {"licenses": copy.deepcopy(licenses)} - process_license.delete_license_unknown(component) - self.assertEqual(component["licenses"], licenses[4:]) - - def test_amend_delete_license_unknown(self) -> None: - sbom = { - "metadata": { - "component": { - "bom-ref": "a bom-ref", - "licenses": [ - {"license": {"name": "some_license"}}, - {"license": {"name": "license"}}, - {"license": {"name": "unknown", "text": {"content": ""}}}, - { - "license": { - "name": "unknown", - "text": {"content": "some text"}, - } - }, - ], - }, - "authors": [{"name": "automated"}], - }, - "components": [ - { - "bom-ref": "a second bom-ref", - "licenses": [ - {"license": {"name": "some_license"}}, - {"license": {"name": "license"}}, - ], - }, - { - "bom-ref": "a third bom-ref", - "licenses": [ - {"license": {"name": "some_unknown_license"}}, - {"license": {"name": "license"}}, - { - "license": { - "name": "unknown", - "text": {"content": "some description"}, - } - }, - ], - }, - {"name": "test", "bom-ref": "reference"}, - ], - "compositions": [ - { - "aggregate": "incomplete", - "assemblies": ["a bom-ref", "a second bom-ref", "a third bom-ref"], - } - ], + def test_file_extension_ignored(self): + component = { + "licenses": [ + {"license": {"name": "another license"}}, + {"license": {"name": "license_name"}}, + ] } - sbom_changed = { - "metadata": { - "component": { - "bom-ref": "a bom-ref", - "licenses": [ - {"license": {"name": "some_license"}}, - {"license": {"name": "license"}}, - { - "license": { - "name": "unknown", - "text": {"content": "some text"}, - } - }, - ], - }, - "authors": [{"name": "automated"}], - }, - "components": [ + expected = { + "licenses": [ { - "bom-ref": "a second bom-ref", - "licenses": [ - {"license": {"name": "some_license"}}, - {"license": {"name": "license"}}, - ], + "license": { + "name": "another license", + "text": {"content": "The text describing another license."}, + } }, { - "bom-ref": "a third bom-ref", - "licenses": [ - {"license": {"name": "license"}}, - { - "license": { - "name": "unknown", - "text": {"content": "some description"}, - } - }, - ], + "license": { + "name": "license_name", + "text": {"content": "The text describing a license."}, + } }, - {"name": "test", "bom-ref": "reference"}, - ], - "compositions": [ - { - "aggregate": "incomplete", - "assemblies": [ - "a bom-ref", - "a second bom-ref", - "a third bom-ref", - "reference", - ], - } - ], + ] } - run_amend(sbom) - self.assertEqual(sbom, sbom_changed) - - def test_amend_delete_single_license(self) -> None: - sbom = { - "metadata": { - "component": { - "bom-ref": "a bom-ref", - "licenses": [ - {"license": {"name": "unknown", "text": {"content": ""}}}, - ], - }, - "authors": [{"name": "automated"}], - }, - "components": [ - { - "bom-ref": "a second bom-ref", - "licenses": [ - {"license": {"name": "some_license"}}, - {"license": {"name": "license"}}, - ], - }, - { - "bom-ref": "a third bom-ref", - "licenses": [ - {"license": {"name": "some_unknown_license"}}, - ], - }, - ], - "compositions": [ - { - "aggregate": "incomplete", - "assemblies": ["a bom-ref", "a second bom-ref", "a third bom-ref"], - } - ], + self.operation.handle_component(component) + self.assertDictEqual(component, expected) + + def test_name_is_case_insensitive(self): + component = { + "licenses": [ + {"license": {"name": "ANOTHER license"}}, + {"license": {"name": "uppercase"}}, + ] } - sbom_changed = { - "metadata": { - "component": { - "bom-ref": "a bom-ref", - }, - "authors": [{"name": "automated"}], - }, - "components": [ + expected = { + "licenses": [ { - "bom-ref": "a second bom-ref", - "licenses": [ - {"license": {"name": "some_license"}}, - {"license": {"name": "license"}}, - ], + "license": { + "name": "ANOTHER license", + "text": {"content": "The text describing another license."}, + } }, { - "bom-ref": "a third bom-ref", + "license": { + "name": "uppercase", + "text": {"content": "UPPERCASE LICENSE"}, + } }, - ], - "compositions": [ - { - "aggregate": "incomplete", - "assemblies": ["a bom-ref", "a second bom-ref", "a third bom-ref"], - } - ], + ] } - run_amend(sbom) - self.assertEqual(sbom, sbom_changed) + self.operation.handle_component(component) + self.assertDictEqual(component, expected) + + def test_invalid_license_dir_raises(self): + operation = AddLicenseText(Path("somethingthatsurelydoesntexist")) + with self.assertRaises(AppError): + operation.prepare(self.sbom_fixture) -class TestInferCopyright(AmendTestCase): +class InferCopyrightTestCase(AmendTestCase): def setUp(self) -> None: super().setUp() self.operation = InferCopyright() @@ -772,7 +424,7 @@ def test_create_copyright(self) -> None: def test_set_copyright_from_supplier_in_metadata(self) -> None: year = datetime.date.today().year self.sbom_fixture["metadata"]["component"]["supplier"] = {"name": "Acme Inc."} - run_amend(self.sbom_fixture) + run_amend(self.sbom_fixture, selected=[InferCopyright]) self.assertEqual( self.sbom_fixture["metadata"]["component"]["copyright"], f"Copyright (c) {year} Acme Inc.", @@ -783,7 +435,7 @@ def test_set_copyright_from_supplier_in_components(self) -> None: self.sbom_fixture["components"][0].pop("externalReferences") self.sbom_fixture["components"][0]["supplier"] = {"name": "Acme Inc."} year = datetime.date.today().year - run_amend(self.sbom_fixture) + run_amend(self.sbom_fixture, selected=[InferCopyright]) company = self.sbom_fixture["components"][0]["supplier"]["name"] self.assertEqual( self.sbom_fixture["components"][0]["copyright"], @@ -791,5 +443,87 @@ def test_set_copyright_from_supplier_in_components(self) -> None: ) +class DeleteAmbiguousLicensesTestCase(AmendTestCase): + def setUp(self): + super().setUp() + self.operation = DeleteAmbiguousLicenses() + self.component = self.sbom_fixture["components"][0] + + def test_delete_one_license_in_set(self): + self.component["licenses"] = [ + {"license": {"id": "Apache-2.0"}}, + {"license": {"name": "Some license"}}, + ] + expected = copy.deepcopy(self.component) + expected["licenses"] = [{"license": {"id": "Apache-2.0"}}] + + self.operation.handle_component(self.component) + self.assertDictEqual(self.component, expected) + + def test_delete_sole_license(self): + self.component["licenses"] = [ + {"license": {"name": "Some license"}}, + ] + expected = copy.deepcopy(self.component) + del expected["licenses"] + + self.operation.handle_component(self.component) + self.assertDictEqual(self.component, expected) + + def test_delete_multiple_licenses(self): + self.component["licenses"] = [ + {"license": {"name": "Some license"}}, + {"license": {"id": "Apache-2.0"}}, + { + "license": { + "name": "License with text", + "text": {"content": "Full text"}, + } + }, + {"license": {"name": "Foo license"}}, + {"license": {"name": "Bar license"}}, + ] + expected = copy.deepcopy(self.component) + expected["licenses"] = [ + {"license": {"id": "Apache-2.0"}}, + { + "license": { + "name": "License with text", + "text": {"content": "Full text"}, + } + }, + ] + + self.operation.handle_component(self.component) + self.assertDictEqual(self.component, expected) + + def test_dont_delete_id(self): + self.component["licenses"] = [ + {"license": {"id": "Apache-2.0"}}, + ] + expected = copy.deepcopy(self.component) + + self.operation.handle_component(self.component) + self.assertDictEqual(self.component, expected) + + def test_dont_delete_expression(self): + self.component["licenses"] = [ + {"expression": "Apache-2.0 AND (MIT OR GPL-2.0-only)"}, + ] + expected = copy.deepcopy(self.component) + + self.operation.handle_component(self.component) + self.assertDictEqual(self.component, expected) + + def test_dont_delete_name_with_text(self): + self.component["licenses"] = [ + {"license": {"name": "Some license", "text": {"content": "Full text"}}}, + ] + expected = copy.deepcopy(self.component) + + self.operation.handle_component(self.component) + self.assertDictEqual(self.component, expected) + + if __name__ == "__main__": unittest.main() diff --git a/tests/test_main.py b/tests/test_main.py index d460a8de..8f3be3db 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -99,8 +99,28 @@ def test_get_amend_license_from_folder(self, mock_read: unittest.mock.Mock) -> N [ "", "amend", + "--operation", + "license-name-to-id", + "fake_bom.cdx.json", + str(("--license-dir=" + path.as_posix() + "/auxiliary/licenses")), + ], + ): + mock_read.return_value = ({}, "json") + result = main() + self.assertEqual(result, _STATUS_OK) + + @unittest.mock.patch("cdxev.__main__.read_sbom") + def test_operation_selection(self, mock_read: unittest.mock.Mock) -> None: + with unittest.mock.patch( + "sys.argv", + [ + "", + "amend", + "--operation", + "add-bom-ref", + "--operation", + "infer-copyright", "fake_bom.cdx.json", - str(("--license-path=" + path.as_posix() + "/auxiliary/licenses")), ], ): mock_read.return_value = ({}, "json")