Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support cargo workspaces #3602

Merged
merged 6 commits into from
Mar 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 99 additions & 27 deletions src/packagedcode/cargo.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# See https://aboutcode.org for more information about nexB OSS projects.
#

import os
import re

import saneyaml
Expand All @@ -20,7 +21,81 @@
"""


class CargoTomlHandler(models.DatafileHandler):
class CargoBaseHandler(models.DatafileHandler):
@classmethod
def assemble(cls, package_data, resource, codebase, package_adder):
"""
Assemble Cargo.toml and possible Cargo.lock datafiles. Also
support cargo workspaces where we have multiple packages from
a repository and some shared information present at top-level.
"""
workspace = package_data.extra_data.get("workspace", {})
workspace_members = workspace.get("members", [])
workspace_package_data = workspace.get("package", {})
attributes_to_copy = [
"license_detections",
"declared_license_expression",
"declared_license_expression_spdx"
]
if "license" in workspace_package_data:
for attribute in attributes_to_copy:
workspace_package_data[attribute] = getattr(package_data, attribute)

workspace_root_path = resource.parent(codebase).path
if workspace_package_data and workspace_members:
for workspace_member_path in workspace_members:
workspace_directory_path = os.path.join(workspace_root_path, workspace_member_path)
workspace_directory = codebase.get_resource(path=workspace_directory_path)
if not workspace_directory:
continue

# Update the package data for all members with the
# workspace package data
for resource in workspace_directory.children(codebase):
if cls.is_datafile(location=resource.location):
if not resource.package_data:
continue

updated_package_data = cls.update_resource_package_data(
package_data=workspace_package_data,
old_package_data=resource.package_data.pop(),
mapping=CARGO_ATTRIBUTE_MAPPING,
)
resource.package_data.append(updated_package_data)
resource.save(codebase)

yield from cls.assemble_from_many_datafiles(
datafile_name_patterns=('Cargo.toml', 'cargo.toml', 'Cargo.lock', 'cargo.lock'),
directory=workspace_directory,
codebase=codebase,
package_adder=package_adder,
)
else:
yield from cls.assemble_from_many_datafiles(
datafile_name_patterns=('Cargo.toml', 'cargo.toml', 'Cargo.lock', 'cargo.lock'),
directory=resource.parent(codebase),
codebase=codebase,
package_adder=package_adder,
)

@classmethod
def update_resource_package_data(cls, package_data, old_package_data, mapping=None):

for attribute in old_package_data.keys():
if attribute in mapping:
replace_by_attribute = mapping.get(attribute)
old_package_data[attribute] = package_data.get(replace_by_attribute)
elif attribute == "parties":
old_package_data[attribute] = list(get_parties(
person_names=package_data.get("authors"),
party_role='author',
))

return old_package_data



class CargoTomlHandler(CargoBaseHandler):
datasource_id = 'cargo_toml'
path_patterns = ('*/Cargo.toml', '*/cargo.toml',)
default_package_type = 'cargo'
Expand All @@ -31,11 +106,16 @@ class CargoTomlHandler(models.DatafileHandler):
@classmethod
def parse(cls, location):
package_data = toml.load(location, _dict=dict)

core_package_data = package_data.get('package', {})
workspace = package_data.get('workspace', {})
extra_data = {}

name = core_package_data.get('name')
version = core_package_data.get('version')
if isinstance(version, dict) and "workspace" in version:
version = None
extra_data["version"] = "workspace"

description = core_package_data.get('description') or ''
description = description.strip()

Expand Down Expand Up @@ -66,6 +146,8 @@ def parse(cls, location):
repository_homepage_url = name and f'https://crates.io/crates/{name}'
repository_download_url = name and version and f'https://crates.io/api/v1/crates/{name}/{version}/download'
api_data_url = name and f'https://crates.io/api/v1/crates/{name}'
if workspace:
extra_data["workspace"] = workspace

yield models.PackageData(
datasource_id=cls.datasource_id,
Expand All @@ -82,22 +164,24 @@ def parse(cls, location):
repository_download_url=repository_download_url,
api_data_url=api_data_url,
dependencies=dependencies,
extra_data=extra_data,
)

@classmethod
def assemble(cls, package_data, resource, codebase, package_adder):
"""
Assemble Cargo.toml and possible Cargo.lock datafiles
"""
yield from cls.assemble_from_many_datafiles(
datafile_name_patterns=('Cargo.toml', 'cargo.toml', 'Cargo.lock', 'cargo.lock'),
directory=resource.parent(codebase),
codebase=codebase,
package_adder=package_adder,
)

CARGO_ATTRIBUTE_MAPPING = {
# Fields in PackageData model: Fields in cargo
"homepage_url": "homepage",
"vcs_url": "repository",
"keywords": "categories",
"extracted_license_statement": "license",
# These are fields carried over to avoid re-detection of licenses
"license_detections": "license_detections",
"declared_license_expression": "declared_license_expression",
"declared_license_expression_spdx": "declared_license_expression_spdx",
}

AyanSinhaMahapatra marked this conversation as resolved.
Show resolved Hide resolved

class CargoLockHandler(models.DatafileHandler):
class CargoLockHandler(CargoBaseHandler):
datasource_id = 'cargo_lock'
path_patterns = ('*/Cargo.lock', '*/cargo.lock',)
default_package_type = 'cargo'
Expand Down Expand Up @@ -144,18 +228,6 @@ def parse(cls, location):
dependencies=dependencies,
)

@classmethod
def assemble(cls, package_data, resource, codebase, package_adder):
"""
Assemble Cargo.toml and possible Cargo.lock datafiles
"""
yield from cls.assemble_from_many_datafiles(
datafile_name_patterns=('Cargo.toml', 'Cargo.lock',),
directory=resource.parent(codebase),
codebase=codebase,
package_adder=package_adder,
)


def dependency_mapper(dependencies, scope='dependencies'):
"""
Expand Down Expand Up @@ -197,7 +269,7 @@ def get_parties(person_names, party_role):
name=name,
role=party_role,
email=email,
)
).to_dict()


person_parser = re.compile(
Expand Down
8 changes: 7 additions & 1 deletion src/packagedcode/licensing.py
Original file line number Diff line number Diff line change
Expand Up @@ -709,6 +709,12 @@ def get_normalized_license_detections(
if detections:
license_detections.extend(detections)

if not license_detections:
unknown_dict_object = repr(dict(extracted_license.items()))
unknown_detection = get_unknown_license_detection(query_string=unknown_dict_object)
license_detections.append(unknown_detection)
if TRACE:
logger_debug(f'get_normalized_license_detections: dict: unknown_dict_object: {unknown_dict_object}, unknown_detection: {saneyaml.dump(unknown_detection.to_dict())}')
else:
extracted_license_statement = saneyaml.dump(extracted_license)
license_detections = get_license_detections_for_extracted_license_statement(
Expand Down Expand Up @@ -753,7 +759,6 @@ def get_normalized_license_detections(

else:
extracted_license_statement = saneyaml.dump(extracted_license_item)

detections = get_license_detections_for_extracted_license_statement(
extracted_license_statement=extracted_license_statement,
try_as_expression=try_as_expression,
Expand Down Expand Up @@ -819,6 +824,7 @@ def get_license_detections_and_expression(
if not license_detections:
if not isinstance(extracted_license_statement, str):
extracted_license_statement = saneyaml.dump(extracted_license_statement)

license_detection = get_unknown_license_detection(query_string=extracted_license_statement)
license_detections = [license_detection]

Expand Down
5 changes: 4 additions & 1 deletion src/packagedcode/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -782,7 +782,10 @@ def populate_license_fields(self):
)

if self.extracted_license_statement and not isinstance(self.extracted_license_statement, str):
self.extracted_license_statement = saneyaml.dump(self.extracted_license_statement)
if isinstance(self.extracted_license_statement, dict):
self.extracted_license_statement = saneyaml.dump(dict(self.extracted_license_statement.items()))
else:
self.extracted_license_statement = saneyaml.dump(self.extracted_license_statement)

def update_purl_fields(self, package_data, replace=False):

Expand Down
2 changes: 2 additions & 0 deletions src/packagedcode/plugin_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,8 @@ def get_package_and_deps(codebase, package_adder=add_to_package, strip_root=Fals
for dfp in item.datafile_paths
]
packages.append(item)
if TRACE:
logger_debug(' get_package_and_deps: Package:', item.purl)

elif isinstance(item, Dependency):
if strip_root and not has_single_resource:
Expand Down
Loading
Loading