diff --git a/cdxev/__main__.py b/cdxev/__main__.py
index c47c857e..96d0bb26 100644
--- a/cdxev/__main__.py
+++ b/cdxev/__main__.py
@@ -5,16 +5,16 @@
import inspect
import json
import logging
-import os
import re
import shutil
import sys
import textwrap
from dataclasses import dataclass
from pathlib import Path
-from typing import TYPE_CHECKING, List, NoReturn, Optional, Tuple
+from typing import TYPE_CHECKING, NoReturn, Optional, Tuple
import docstring_parser
+from natsort import os_sorted
import cdxev.amend.command as amend
import cdxev.set
@@ -379,7 +379,7 @@ def create_merge_parser(
"input",
metavar="",
help="Paths to SBOM files to merge. You must specify at least two paths.",
- nargs="+",
+ nargs="*",
type=Path,
)
parser.add_argument(
@@ -705,37 +705,39 @@ def invoke_amend(args: argparse.Namespace) -> int:
def invoke_merge(args: argparse.Namespace) -> int:
- if len(args.input) < 2 and args.from_folder is None:
+ global logger
+
+ inputs = args.input
+
+ if args.from_folder is not None:
+ if not args.from_folder.is_dir():
+ usage_error(
+ "Path not found or is not a directory: " + str(args.from_folder),
+ args.parser,
+ )
+
+ # Find all SBOMs in source folder (filenames: bom.json or *.cdx.json)
+ folder_inputs: list[Path] = list(args.from_folder.glob("*.cdx.json"))
+ if (args.from_folder / "bom.json").is_file():
+ folder_inputs.append(args.from_folder / "bom.json")
+
+ # Remove any paths which have already been provided as an explicit input
+ folder_inputs = os_sorted(p for p in folder_inputs if p not in args.input)
+
+ if len(folder_inputs) == 0:
+ logger.warning(f"No additional SBOMs found in folder: {args.from_folder}")
+
+ for input in folder_inputs:
+ logger.debug(f"Found in folder: {input}")
+
+ inputs += folder_inputs
+
+ if len(inputs) < 2:
usage_error(
- "At least two input files, or a input file"
- " and an folder path must be specified.",
- args.parser,
+ f"Not enough inputs. Must be at least 2, you have provided {len(inputs)}."
)
- inputs: List[dict] = []
- for input in args.input:
- sbom, _ = read_sbom(input)
- inputs.append(sbom)
- if args.from_folder is not None:
- if not os.path.exists(args.from_folder):
- usage_error("Path to folder does not exist", args.parser)
- path_to_folder = args.from_folder
- name_governing_sbom = os.path.basename(os.path.normpath(args.input[0]))
- list_folder_content = os.listdir(path_to_folder)
- # use python sorted function to sort the names of the files, the
- # names are compared in lowercase to adhere to alphabetical order
- list_folder_content_sorted = sorted(list_folder_content, key=str.lower)
-
- for file_name in list_folder_content_sorted:
- if (
- re.search(r"^bom\.json$|.*\.cdx\.json", file_name) # noqa: W605
- and file_name != name_governing_sbom
- ):
- print(file_name)
- new_sbom, _ = read_sbom(Path(os.path.join(path_to_folder, file_name)))
- inputs.append(new_sbom)
- if len(inputs) == 1:
- usage_error("Provided folder does not contain any SBOM files", args.parser)
+ inputs = [sbom for (sbom, _) in (read_sbom(input) for input in inputs)]
output = merge(inputs)
write_sbom(output, args.output)
return Status.OK
diff --git a/docs/available_commands.md b/docs/available_commands.md
index 5dd997f1..005c8be5 100644
--- a/docs/available_commands.md
+++ b/docs/available_commands.md
@@ -161,28 +161,24 @@ The following schema is a little more involved. It will delete any component who
## merge
-This command requires at least two input files, but can accept an arbitrary number.
+This command requires at least two input files, but can accept an arbitrary number. Inputs can either be specified directly as positional arguments on the command-line or using the `--from-folder ` option. Files specified as arguments are merged in the order they are given, files in the folder are merged in alphabetical order (see note below).
+If both positional arguments and the `--from-folder` option are used, then the position arguments are merged first, followed by the files in the folder. The command will not merge the same file twice, if it is specified on the command-line and also part of the folder.
-Alternatively only one file can be submitted and the command `--from-folder` must be used to provide the path to a folder.
-This command reads the contents of the provided folder and loads *all files* with "*.cdx.json" or the name "bom.json", according to the naming convention described in the [CycloneDX Specification](https://cyclonedx.org/specification/overview/#recognized-file-patterns).
-If a file in the folder has the same name as the provided sbom to be merged in, it will be skipped.
-The files are then merged in alphabetical order into the regularly provided sbom in this order.
+When using the `--from-folder` option, the program looks for files matching either of the [recommended CycloneDX naming schemes](https://cyclonedx.org/specification/overview/#recognized-file-patterns): `bom.json` or `*.cdx.json`.
-The process runs iterative, merging two SBOMs in each step.
-In the first step, the second submitted SBOM is merged into the first.
-In the second step the third would be merged into the resulting SBOM from step one etc.
+__Note on merge order:__
+Input files in the folder provided to the `--from-folder` option are sorted in a platform-specific way. In other words, they are merged in the same order they appear in your operating system's file browser (e.g., Windows Explorer).
-The Resulting SBOM will contain the Metadata from the first SBOM submitted, with only the timestamp being updated.
+The process runs iteratively, merging two SBOMs in each iteration. In the first round, the second submitted SBOM is merged into the first. In the second round the third would be merged into the result of the first round and so on.
+In mathematical terms: `output = (((input_1 x input_2) x input_3) x input_4 ...)`
-The components from the first SBOM submitted will be kept unchanged, if the SBOMs that are merged contain new components,
-those will be added to the list of components. Should a component be contained in several SBOMs, the one from the SBOM that was merged earlier will be taken without any consideration. If this happens and a component is dropped during the merge, a warning will be shown.
-Uniqueness of the bom-refs will be ensured.
+A few noted on the merge algorithm:
-The dependencies for new components are taken over.
-If components are contained in both SBOMs, then the dependsON lists
-for them will be merged so that no information will be lost.
-
-If a VEX section is contained, it will be merged as well, for details see merge-vex section
+* The `metadata` field is always retained from the first input and never changed through a merge with the exception of the `timestamp`.
+* Components are merged into the result in the order they __first__ appear in the inputs. If any subsequent input specifies the same component (sameness in this case being defined as having identical identifying attributes such as `name`, `version`, `purl`, etc.), the later instance of the component will be dropped with a warning. __This command cannot be used to merge information inside components.__
+* The resulting dependency graph will reflect all dependencies from all inputs. Dependencies from later inputs are always added to the result, even if the component is dropped as a duplicate as described above.
+* Uniqueness of *bom-refs* will be ensured.
+* If the inputs contain VEX information in the form of a `vulnerabilities` field, this will be merged as well. For details see section on the `merge-vex` command.
## merge-vex
@@ -266,7 +262,7 @@ The *value* must be given as a valid JSON value. That means command-line usage c
cdx-ev set bom.json --cpe --key copyright --value '"2022 Acme Inc"'
# Set the copyright for all versions of the given component
- cdx-ev set bom.json --group=org.acme --name=my_program --version-range vers:generic/* --key copyright --value '"Copyright 2024 Acme"'
+ cdx-ev set bom.json --group=org.acme --name=my_program --version-range vers:generic/* --key copyright --value '"Copyright 2024 Acme"'
### Conflicts
@@ -321,7 +317,7 @@ When passing the targets, names and values in a file, the file must conform to t
Example for the use of version ranges:
[
- {
+ {
"id": {
"name": "web-framework",
"group": "org.acme",
diff --git a/poetry.lock b/poetry.lock
index 4bf7517d..c0a5c3b5 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
[[package]]
name = "arrow"
@@ -608,6 +608,21 @@ files = [
{file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
]
+[[package]]
+name = "natsort"
+version = "8.4.0"
+description = "Simple yet flexible natural sorting in Python."
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "natsort-8.4.0-py3-none-any.whl", hash = "sha256:4732914fb471f56b5cce04d7bae6f164a592c7712e1c85f9ef585e197299521c"},
+ {file = "natsort-8.4.0.tar.gz", hash = "sha256:45312c4a0e5507593da193dedd04abb1469253b601ecaf63445ad80f0a1ea581"},
+]
+
+[package.extras]
+fast = ["fastnumbers (>=2.0.0)"]
+icu = ["PyICU (>=1.0.0)"]
+
[[package]]
name = "nodeenv"
version = "1.9.1"
@@ -751,6 +766,16 @@ files = [
[package.extras]
windows-terminal = ["colorama (>=0.4.6)"]
+[[package]]
+name = "pyicu"
+version = "2.13.1"
+description = "Python extension wrapping the ICU C++ API"
+optional = false
+python-versions = "*"
+files = [
+ {file = "PyICU-2.13.1.tar.gz", hash = "sha256:d4919085eaa07da12bade8ee721e7bbf7ade0151ca0f82946a26c8f4b98cdceb"},
+]
+
[[package]]
name = "pytest"
version = "8.2.2"
@@ -1195,4 +1220,4 @@ tests = ["coverage[toml]"]
[metadata]
lock-version = "2.0"
python-versions = "^3.9.0"
-content-hash = "9ef780956ee21e5e82f90221a5769125c0b6892688b8a15088088501683dd577"
+content-hash = "f4aa949c4ac8bc32620558558fab6ae4ea57b064e0434379bb053ae0bdbfd076"
diff --git a/pyproject.toml b/pyproject.toml
index 533a7211..9f54f099 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,6 +26,11 @@ python-dateutil = "2.9.0.post0"
jsonschema = {version = "4.22.0", extras = ["format"]}
docstring-parser = "^0.16"
charset-normalizer = "^3.3.2"
+pyicu = [
+ {version = "^2.13.1", platform = "darwin"},
+ {version = "^2.13.1", platform = "linux"}
+ ]
+natsort = "^8.4.0"
univers = "30.11.0"
[tool.poetry.group.dev.dependencies]