Skip to content

Commit

Permalink
Use puurl for package identification #805 and #275
Browse files Browse the repository at this point in the history
Signed-off-by: Philippe Ombredanne <[email protected]>
  • Loading branch information
pombredanne committed Feb 27, 2018
1 parent df33260 commit 9ebe47a
Show file tree
Hide file tree
Showing 23 changed files with 335 additions and 685 deletions.
7 changes: 0 additions & 7 deletions src/packagedcode/gemfile_lock.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,13 +200,6 @@ def flatten(self):
flattened.extend(gem.flatten())
return sorted(set(flattened))

def flatten_urn(self):
"""
Return a flattened list of parent(urn,gem_name),
child(urn,gem_name) dependencies from self.
"""
return [(p.urn, p.gem_name, c.urn, c.gem_name,) for p, c in self.flatten()]

def dependency_tree(self):
"""
Return a tree of dependencies as nested mappings.
Expand Down
2 changes: 1 addition & 1 deletion src/packagedcode/maven.py
Original file line number Diff line number Diff line change
Expand Up @@ -793,7 +793,7 @@ def parse(location=None, text=None, check_is_pom=True, extra_properties=None):
# if packaging and packaging != 'jar':
# qualifiers['packaging'] = packaging

dep_id = models.PackageIdentifier(
dep_id = models.PackageUniversalURL(
type='maven',
namespace=dgroup_id,
name=dartifact_id,
Expand Down
51 changes: 27 additions & 24 deletions src/packagedcode/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,15 +166,18 @@ def __init__(self, field, **kwargs):
super(BaseListType, self).__init__(field=field, **kwargs)


class PackageIdentifier(
namedtuple('PackageIdentifier',
class PackageUniversalURL(
namedtuple('PackageUniversalURL',
'type namespace name version qualifiers path')):
"""
A "mostly universal" Package identifier tuple.
A puurl is a "mostly universal" Package Universal URL.
This is either
- a URL string as in:
`type://namespace/name@version?qualifiers#path`
- a set of string fields:
`type:namespace/name@version?qualifiers#path`
For example:
maven:org.apache.commons/[email protected]
- type: optional. The type of package as maven, npm, rpm.
- namespace: optional. Some namespace prefix, slash-separated
such as an NPM scope, a Gigthub user or org, a Debian distro
Expand Down Expand Up @@ -202,19 +205,19 @@ def __new__(self, type=None, namespace=None, name=None,
if key == 'qualifiers':
if qualifiers and not isinstance(qualifiers, dict):
raise ValueError(
"Invalid PackageIdentifier: 'qualifiers' "
"Invalid PackageUniversalURL: 'qualifiers' "
"must be a mapping: {}".format(repr(qualifiers)))
continue

if value and not isinstance(value, basestring):
raise ValueError(
'Invalid PackageIdentifier: '
'Invalid PackageUniversalURL: '
'{} must be a string: {}'.format(repr(name), repr(value)))

if key == 'name' and not name:
raise ValueError("Invalid PackageIdentifier: a 'name' is required.")
raise ValueError("Invalid PackageUniversalURL: a 'name' is required.")

return super(PackageIdentifier, self).__new__(PackageIdentifier,
return super(PackageUniversalURL, self).__new__(PackageUniversalURL,
type or None, namespace or None,
name,
version or None, qualifiers or None, path or None)
Expand All @@ -225,7 +228,7 @@ def __str__(self, *args, **kwargs):
def to_string(self):
"""
Return a compact ABC Package identifier URL in the form of
`type://namespace/name@version?qualifiers#path`
`type:namespace/name@version?qualifiers#path`
"""
identifier = []
if self.type:
Expand Down Expand Up @@ -259,7 +262,7 @@ def to_string(self):
@classmethod
def from_string(cls, package_id):
"""
Return a PackageIdentifier parsed from a string.
Return a PackageUniversalURL parsed from a string.
"""
if not package_id:
raise ValueError('package_id is required.')
Expand Down Expand Up @@ -300,7 +303,7 @@ def from_string(cls, package_id):
if not name:
raise ValueError('A package name is required: '.format(repr(package_id)))

return PackageIdentifier(type, namespace, name, version, qualifiers, path)
return PackageUniversalURL(type, namespace, name, version, qualifiers, path)


class BaseModel(Model):
Expand Down Expand Up @@ -366,15 +369,15 @@ class PackageRelationship(BaseModel):
label='relationship between two packages',
description='A directed relationship between two packages. '
'This consiste of three attributes:'
'The "from" (or subject) package identifier in the relationship, '
'the "to" (or object) package identifier in the relationship, '
'The "from" (or subject) package "puurl" in the relationship, '
'the "to" (or object) package "puurl" in the relationship, '
'and the "relationship" (or predicate) string that specifies the relationship.'
)

from_pid = StringType()
from_pid.metadata = dict(
label='"From" package identifier in the relationship',
description='A compact ABC Package identifier URL in the form of '
from_puurl = StringType()
from_puurl.metadata = dict(
label='"From" package puurl in the relationship',
description='A compact Package Universal URL in the form of '
'type://namespace/name@version?qualifiers#path')

relationship = StringType()
Expand All @@ -384,18 +387,18 @@ class PackageRelationship(BaseModel):
'identifiers such as "source_of" when a package is the source '
'code package for another package')

to_pid = StringType()
to_pid.metadata = dict(
label='"To" package identifier in the relationship',
to_puurl = StringType()
to_puurl.metadata = dict(
label='"To" Package Univesal URL in the relationship',
description='A compact ABC Package identifier URL in the form of '
'type://namespace/name@version?qualifiers#path')

class Options:
# this defines the important serialization order
fields_order = [
'from_pid',
'from_puurl',
'relationship',
'to_pid',
'to_puurl',
]

class BasePackage(BaseModel):
Expand Down Expand Up @@ -450,7 +453,7 @@ def identifier(self):
Return a compact ABC Package identifier URL in the form of
`type://namespace/name@version?qualifiers#path`
"""
pid = PackageIdentifier(
pid = PackageUniversalURL(
self.type, self.namespace, self.name, self.version,
self.qualifiers, self.path)
return str(pid)
Expand Down
6 changes: 3 additions & 3 deletions src/packagedcode/npm.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ def bundle_deps_mapper(bundle_deps, package):
continue

ns, name = split_scoped_package_name(bdep)
identifier = models.PackageIdentifier(
identifier = models.PackageUniversalURL(
type='npm', namespace=ns, name=name)

dep = models.DependentPackage(
Expand Down Expand Up @@ -442,15 +442,15 @@ def deps_mapper(deps, package, field_name):
for d in dependencies:
if d.scope != 'dependencies':
continue
pid = models.PackageIdentifier.from_string(d.identifier)
pid = models.PackageUniversalURL.from_string(d.identifier)
npm_name = pid.name
if pid.namespace:
npm_name = '/'.join([pid.namespace, pid.name])
deps_by_name[npm_name] = d

for fqname, requirement in deps.items():
ns, name = split_scoped_package_name(fqname)
identifier = models.PackageIdentifier(
identifier = models.PackageUniversalURL(
type='npm', namespace=ns, name=name).to_string()

# optionalDependencies override the dependencies with the same name
Expand Down
2 changes: 1 addition & 1 deletion src/packagedcode/phpcomposer.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ def deps_mapper(deps, package, field_name):
for ns_name, requirement in deps.items():
ns, _, name = ns_name.rpartition('/')

did = models.PackageIdentifier(
did = models.PackageUniversalURL(
type='composer',
namespace=ns,
name=name
Expand Down
8 changes: 4 additions & 4 deletions src/packagedcode/rpm.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def parse(location):

related_packages = []
if infos.source_rpm:
identifier = models.PackageIdentifier(
identifier = models.PackageUniversalURL(
type='rpm',
name=name,
version=evr,
Expand All @@ -205,16 +205,16 @@ def parse(location):
if src_arch:
src_qualifiers['arch'] = src_arch

src_identifier = models.PackageIdentifier(
src_identifier = models.PackageUniversalURL(
type='rpm',
name=src_name,
version=src_evr,
qualifiers=src_qualifiers
).to_string()
if TRACE: logger_debug('parse: source_rpm', src_identifier)
related_packages = [models.PackageRelationship(
from_pid=src_identifier,
to_pid=identifier,
from_puurl=src_identifier,
to_puurl=identifier,
relationship='source_of')]

parties = []
Expand Down
66 changes: 31 additions & 35 deletions src/packagedcode/rubygems.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@

from commoncode import fileutils

# FIXME: filter out internal fields! such as installed_by_version
# TODO: check:
# https://github.com/hugomaiavieira/pygments-rspec
# https://github.com/tushortz/pygeminfo
Expand Down Expand Up @@ -68,7 +67,7 @@ def is_gem_file(location):
return location.endswith(('.gem', '.gemspec'))


def get_spec(gemfile, script_file=DUMPSPEC_SCRIPT_LOCATION, filter=False):
def get_spec(gemfile, script_file=DUMPSPEC_SCRIPT_LOCATION):
"""
Return a gemspecs mapping by calling a Ruby script invoking the
Rubygems native API or None.
Expand All @@ -92,8 +91,7 @@ def get_spec(gemfile, script_file=DUMPSPEC_SCRIPT_LOCATION, filter=False):
keys = raw_spec.keys()
logger.debug('\nRubygems spec keys for %(gemfile)r:\n%(keys)r' % locals())
spec.update(raw_spec)
if filter:
spec = filter_fields(spec)
spec = normalize(spec)
return spec


Expand Down Expand Up @@ -139,33 +137,45 @@ def get_specs(locations):


# known gem fields. other are ignored
known_fields = {
known_fields = [
'platform',
'name',
'version',
'homepage',
'summary',
'description',
'licenses',
'email',
'authors',
'date',
'requirements',
'dependencies',
'description',
'email',

# extra fields
'files',
'homepage',
'licenses',
'name',
'requirements',
'summary',
'version',
'test_files',
}
'extra_rdoc_files',

'rubygems_version',
'required_ruby_version',

'rubyforge_project',
'loaded_from',
'original_platform',
'new_platform',
'specification_version',
]


def filter_fields(gem):
def normalize(gem_data, known_fields=known_fields):
"""
Return a gem mapping filtering out any field that is not a known
field in a gem mapping.
field in a gem mapping. Ensure that all known fields are present
even if empty.
"""
spec_fields = set(gem.keys())
unsupported_fields = spec_fields.difference(known_fields)
for f in unsupported_fields:
del gem[f]
return gem
return OrderedDict(
[(k, gem_data.get(k) or None) for k in known_fields]
)


LICENSE_KEYS_MAPPING = {
Expand Down Expand Up @@ -215,20 +225,6 @@ class GemSpec(object):
Represent a Gem specification.
"""
# TODO: Check if we should use 'summary' instead of description
# Structure: {'gem_spec_field': 'collect_field'}

FIELD_MAPPING = {
'name': 'clean_name',
'version': 'version',
'homepage': 'clean_homepage_url',
'description': 'clean_description',
'licenses': 'clean_licenses',
'authors': 'clean_authors',
'email': 'clean_email',
'resource': 'clean_resource',
'copyright': 'clean_copyright',
}

def __init__(self, location):
"""
Initialize from the gem spec or gem file at location.
Expand Down
Loading

0 comments on commit 9ebe47a

Please sign in to comment.