Skip to content

Commit

Permalink
Update Package models types and tests #275
Browse files Browse the repository at this point in the history
 * Support PackageIdentifier class for #805 as Package property and as
   discrete type:namespace/name@version?qualifiers#path fields
 * Improved DependentPackage definitions using a package idenitifier
   and simpler flags. Do not use a mapping per scope anymore.
 * Improve related packages definitions with a PackageRelationship
   class using from/to package identifiers

Signed-off-by: Philippe Ombredanne <[email protected]>
  • Loading branch information
pombredanne committed Feb 16, 2018
1 parent 8ab1a9f commit d8de78f
Show file tree
Hide file tree
Showing 1,058 changed files with 23,679 additions and 15,622 deletions.
40 changes: 20 additions & 20 deletions etc/scripts/testdata/livescan/expected.csv
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
Resource,type,name,base_name,extension,date,size,sha1,md5,files_count,mime_type,file_type,programming_language,is_binary,is_text,is_archive,is_media,is_source,is_script,scan_errors,license__key,license__score,license__short_name,license__category,license__owner,license__homepage_url,license__text_url,license__reference_url,license__spdx_license_key,license__spdx_url,start_line,end_line,matched_rule__identifier,matched_rule__license_choice,matched_rule__licenses,copyright,copyright_holder,email,url,package__type,package__name,package__version,package__primary_language,package__code_type,package__description,package__size,package__release_date,package__parties,package__homepage_url,package__download_url,package__bug_tracking_url,package__vcs_repository,package__copyright,package__asserted_license,package__notice_text
/package.json,file,package.json,package,.json,2017-10-03,2200,918376afce796ef90eeda1d6695f2289c90491ac,1f66239a9b850c5e60a9382dbe2162d2,,text/plain,"ASCII text, with very long lines",JSON,False,True,False,False,True,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
/package.json,,,,,,,,,,,,,,,,,,,,mit,15.00,MIT License,Permissive,MIT,http://opensource.org/licenses/mit-license.php,http://opensource.org/licenses/mit-license.php,https://enterprise.dejacode.com/urn/urn:dje:license:mit,MIT,https://spdx.org/licenses/MIT,24,24,mit_27.RULE,False,[u'mit'],,,,,,,,,,,,,,,,,,,,
/package.json,,,,,,,,,,,,,,,,,,,,mit,100.00,MIT License,Permissive,MIT,http://opensource.org/licenses/mit-license.php,http://opensource.org/licenses/mit-license.php,https://enterprise.dejacode.com/urn/urn:dje:license:mit,MIT,https://spdx.org/licenses/MIT,24,24,mit.LICENSE,False,[u'mit'],,,,,,,,,,,,,,,,,,,,
/package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,23,26,,,,Copyright (c) 2012 LearnBoost &lt [email protected]&gt,,,,,,,,,,,,,,,,,,,
/package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,23,26,,,,,LearnBoost &lt,,,,,,,,,,,,,,,,,,
/package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12,12,,,,,,[email protected],,,,,,,,,,,,,,,,,
/package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,16,16,,,,,,,https://github.com/visionmedia/node-cookie-signature.git,,,,,,,,,,,,,,,,
/package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,27,27,,,,,,,https://github.com/visionmedia/node-cookie-signature/issues,,,,,,,,,,,,,,,,
/package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TJ Holowaychuk,,,,,,,
/json2csv.rb,file,json2csv.rb,json2csv,.rb,2017-10-03,1599,6cfb0bd0fb0b784f57164d15bdfca2b734ad87a6,f18e519b77bc7f3e4213215033db3857,,text/x-python,"Python script, ASCII text executable",Ruby,False,True,False,False,True,True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
/json2csv.rb,,,,,,,,,,,,,,,,,,,,apache-2.0,98.45,Apache 2.0,Permissive,Apache Software Foundation,http://www.apache.org/licenses/,http://www.apache.org/licenses/LICENSE-2.0,https://enterprise.dejacode.com/urn/urn:dje:license:apache-2.0,Apache-2.0,https://spdx.org/licenses/Apache-2.0,5,24,apache-2.0_scancode.RULE,False,"[u'apache-2.0', u'scancode-acknowledgment']",,,,,,,,,,,,,,,,,,,,
/json2csv.rb,,,,,,,,,,,,,,,,,,,,scancode-acknowledgment,98.45,ScanCode acknowledgment,Permissive,nexB,https://github.com/nexB/scancode-toolkit/,,https://enterprise.dejacode.com/urn/urn:dje:license:scancode-acknowledgment,,,5,24,apache-2.0_scancode.RULE,False,"[u'apache-2.0', u'scancode-acknowledgment']",,,,,,,,,,,,,,,,,,,,
/json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,3,,,,Copyright (c) 2017 nexB Inc. and others.,,,,,,,,,,,,,,,,,,,
/json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,3,,,,,nexB Inc. and others.,,,,,,,,,,,,,,,,,,
/json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4,4,,,,,,,http://nexb.com/,,,,,,,,,,,,,,,,
/json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4,4,,,,,,,https://github.com/nexB/scancode-toolkit/,,,,,,,,,,,,,,,,
/json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10,10,,,,,,,http://apache.org/licenses/LICENSE-2.0,,,,,,,,,,,,,,,,
/license,file,license,license,,2017-10-03,679,75c5490a718ddd45e40e0cc7ce0c756abc373123,b965a762efb9421cf1bf4405f336e278,,text/plain,ASCII text,,False,True,False,False,False,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
/license,,,,,,,,,,,,,,,,,,,,gpl-2.0-plus,100.00,GPL 2.0 or later,Copyleft,Free Software Foundation (FSF),http://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html,http://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html,https://enterprise.dejacode.com/urn/urn:dje:license:gpl-2.0-plus,GPL-2.0+,https://spdx.org/licenses/GPL-2.0,1,12,gpl-2.0-plus.LICENSE,False,[u'gpl-2.0-plus'],,,,,,,,,,,,,,,,,,,,
Resource,type,name,base_name,extension,date,size,sha1,md5,files_count,mime_type,file_type,programming_language,is_binary,is_text,is_archive,is_media,is_source,is_script,scan_errors,license__key,license__score,license__short_name,license__category,license__owner,license__homepage_url,license__text_url,license__reference_url,license__spdx_license_key,license__spdx_url,start_line,end_line,matched_rule__identifier,matched_rule__license_choice,matched_rule__licenses,copyright,copyright_holder,email,url,package__type,package__namespace,package__name,package__version,package__qualifiers,package__path,package__primary_language,package__code_type,package__description,package__size,package__release_date,package__parties,package__homepage_url,package__download_url,package__bug_tracking_url,package__vcs_repository,package__copyright,package__asserted_license,package__notice_text
/package.json,file,package.json,package,.json,2017-10-03,2200,918376afce796ef90eeda1d6695f2289c90491ac,1f66239a9b850c5e60a9382dbe2162d2,,text/plain,"ASCII text, with very long lines",JSON,False,True,False,False,True,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
/package.json,,,,,,,,,,,,,,,,,,,,mit,15.00,MIT License,Permissive,MIT,http://opensource.org/licenses/mit-license.php,http://opensource.org/licenses/mit-license.php,https://enterprise.dejacode.com/urn/urn:dje:license:mit,MIT,https://spdx.org/licenses/MIT,24,24,mit_27.RULE,False,[u'mit'],,,,,,,,,,,,,,,,,,,,,,,
/package.json,,,,,,,,,,,,,,,,,,,,mit,100.00,MIT License,Permissive,MIT,http://opensource.org/licenses/mit-license.php,http://opensource.org/licenses/mit-license.php,https://enterprise.dejacode.com/urn/urn:dje:license:mit,MIT,https://spdx.org/licenses/MIT,24,24,mit.LICENSE,False,[u'mit'],,,,,,,,,,,,,,,,,,,,,,,
/package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,23,26,,,,Copyright (c) 2012 LearnBoost &lt [email protected]&gt,,,,,,,,,,,,,,,,,,,,,,
/package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,23,26,,,,,LearnBoost &lt,,,,,,,,,,,,,,,,,,,,,
/package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12,12,,,,,,[email protected],,,,,,,,,,,,,,,,,,,,
/package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,16,16,,,,,,,https://github.com/visionmedia/node-cookie-signature.git,,,,,,,,,,,,,,,,,,,
/package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,27,27,,,,,,,https://github.com/visionmedia/node-cookie-signature/issues,,,,,,,,,,,,,,,,,,,
/package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TJ Holowaychuk,,,,,,,
/json2csv.rb,file,json2csv.rb,json2csv,.rb,2017-10-03,1599,6cfb0bd0fb0b784f57164d15bdfca2b734ad87a6,f18e519b77bc7f3e4213215033db3857,,text/x-python,"Python script, ASCII text executable",Ruby,False,True,False,False,True,True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
/json2csv.rb,,,,,,,,,,,,,,,,,,,,apache-2.0,98.45,Apache 2.0,Permissive,Apache Software Foundation,http://www.apache.org/licenses/,http://www.apache.org/licenses/LICENSE-2.0,https://enterprise.dejacode.com/urn/urn:dje:license:apache-2.0,Apache-2.0,https://spdx.org/licenses/Apache-2.0,5,24,apache-2.0_scancode.RULE,False,"[u'apache-2.0', u'scancode-acknowledgment']",,,,,,,,,,,,,,,,,,,,,,,
/json2csv.rb,,,,,,,,,,,,,,,,,,,,scancode-acknowledgment,98.45,ScanCode acknowledgment,Permissive,nexB,https://github.com/nexB/scancode-toolkit/,,https://enterprise.dejacode.com/urn/urn:dje:license:scancode-acknowledgment,,,5,24,apache-2.0_scancode.RULE,False,"[u'apache-2.0', u'scancode-acknowledgment']",,,,,,,,,,,,,,,,,,,,,,,
/json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,3,,,,Copyright (c) 2017 nexB Inc. and others.,,,,,,,,,,,,,,,,,,,,,,
/json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,3,,,,,nexB Inc. and others.,,,,,,,,,,,,,,,,,,,,,
/json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4,4,,,,,,,http://nexb.com/,,,,,,,,,,,,,,,,,,,
/json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4,4,,,,,,,https://github.com/nexB/scancode-toolkit/,,,,,,,,,,,,,,,,,,,
/json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10,10,,,,,,,http://apache.org/licenses/LICENSE-2.0,,,,,,,,,,,,,,,,,,,
/license,file,license,license,,2017-10-03,679,75c5490a718ddd45e40e0cc7ce0c756abc373123,b965a762efb9421cf1bf4405f336e278,,text/plain,ASCII text,,False,True,False,False,False,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
/license,,,,,,,,,,,,,,,,,,,,gpl-2.0-plus,100.00,GPL 2.0 or later,Copyleft,Free Software Foundation (FSF),http://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html,http://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html,https://enterprise.dejacode.com/urn/urn:dje:license:gpl-2.0-plus,GPL-2.0+,https://spdx.org/licenses/GPL-2.0,1,12,gpl-2.0-plus.LICENSE,False,[u'gpl-2.0-plus'],,,,,,,,,,,,,,,,,,,,,,,
148 changes: 116 additions & 32 deletions src/packagedcode/maven.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,29 +42,31 @@
from commoncode import filetype
from commoncode import fileutils
from packagedcode import models
from typecode import contenttype
from textcode import analysis
from typecode import contenttype


logger = logging.getLogger(__name__)
TRACE = False

logger = logging.getLogger(__name__)

if TRACE:
import sys
logging.basicConfig(stream=sys.stdout)
logger.setLevel(logging.DEBUG)


"""
Support Maven2 POMs.
Attempts to resolve Maven properties when possible.
"""


MAVEN_POM_TYPE = 'Apache Maven POM'

class MavenPomPackage(models.Package):
metafiles = ('.pom', 'pom.xml',)
extensions = ('.pom', '.xml',)
type = models.StringType(default=MAVEN_POM_TYPE)

type = models.StringType(default='maven')

primary_language = models.StringType(default='Java')

@classmethod
Expand Down Expand Up @@ -117,7 +119,6 @@ def to_dict(self):


class MavenPom(pom.Pom):

def __init__(self, location=None, text=None):
"""
Build a POM from a location or unicode text.
Expand Down Expand Up @@ -154,6 +155,7 @@ def __init__(self, location=None, text=None):
self.model_version = self._get_attribute('pomVersion')
self.group_id = self._get_attribute('groupId')
self.artifact_id = self._get_attribute('artifactId')
if TRACE: logger.debug('MavenPom.__init__: self.artifact_id: {}'.format(self.artifact_id))
self.version = self._get_attribute('version')
self.classifier = self._get_attribute('classifier')
self.packaging = self._get_attribute('packaging') or 'jar'
Expand Down Expand Up @@ -217,7 +219,6 @@ def _extra_properties(self):
def _replace_props(cls, text, properties):
if not text:
return text

def subfunc(matchobj):
"""Return the replacement value for a matched property key."""
key = matchobj.group(1)
Expand Down Expand Up @@ -276,7 +277,7 @@ def resolve(self, **extra_properties):
properties.update(extra_properties)

if TRACE:
logger.debug('resolve: properties before self-resolution:\n{}'.format(pformat(properties)))
logger.debug('MavenPom.resolve: properties before self-resolution:\n{}'.format(pformat(properties)))

# FIXME: we could remove any property that itself contains
# ${property} as we do not know how to resolve these
Expand All @@ -286,7 +287,7 @@ def resolve(self, **extra_properties):
properties[key] = MavenPom._replace_props(value, properties)

if TRACE:
logger.debug('resolve: used properties:\n{}'.format(pformat(properties)))
logger.debug('MavenPom.resolve: used properties:\n{}'.format(pformat(properties)))

# these attributes are plain strings
plain_attributes = [
Expand Down Expand Up @@ -342,13 +343,21 @@ def resolve(self, **extra_properties):

for scope, dependencies in self.dependencies.items():
resolved_deps = []
# FIXME: this is missing the packaging/type and classifier
for (group, artifact, version,), required in dependencies:
group = self._replace_properties(group, properties)
artifact = self._replace_properties(artifact, properties)
version = self._replace_properties(version, properties)
# skip weird damaged POMs such as
# http://repo1.maven.org/maven2/net/sourceforge/findbugs/coreplugin/1.0.0/coreplugin-1.0.0.pom
if not group or not artifact:
continue
resolved_deps.append(((group, artifact, version,), required))
self._dependencies[scope] = resolved_deps

if TRACE:
logger.debug('MavenPom.resolve: artifactId after resolve: {}'.format(self.artifact_id))

# TODO: add:
# nest dicts
# 'distribution_management',
Expand Down Expand Up @@ -418,7 +427,12 @@ def _get_attribute(self, xpath, xml=None):
if xml is None:
xml = self._xml
attr = xml.findtext(xpath)
return attr and attr.strip() or None
val = attr and attr.strip() or None
if TRACE:
if 'artifactId' in xpath:
logger.debug('MavenPom._get_attribute: xpath: {}'.format(xpath))
logger.debug('MavenPom._get_attribute: xml: {}'.format(xml))
return val

def _get_attributes_list(self, xpath, xml=None):
"""Return a list of text attribute values for a given xpath or None."""
Expand Down Expand Up @@ -547,7 +561,7 @@ def to_dict(self):
('artifact_id', self.artifact_id),
('version', self.version),
('classifier', self.classifier),
('packaging ', self.packaging),
('packaging', self.packaging),

('parent', self.parent.to_dict() if self.parent else {}),

Expand Down Expand Up @@ -678,7 +692,7 @@ def has_basic_pom_attributes(pom):
if TRACE and not basics:
logger.debug(
'has_basic_pom_attributes: not a POM, incomplete GAV: '
'"{}":"{}":"{}"'.format(pom.model_version and pom.group_id and pom.artifact_id))
'"{}":"{}":"{}"'.format(pom.model_version, pom.group_id, pom.artifact_id))
return basics


Expand All @@ -691,6 +705,9 @@ def _get_mavenpom(location=None, text=None, check_is_pom=False, extra_properties
pom.resolve(**extra_properties)
# TODO: we cannot do much without these??
if check_is_pom and not has_basic_pom_attributes(pom):
if TRACE:
logger.debug('_get_mavenpom: has_basic_pom_attributes: {}'.format(has_basic_pom_attributes(pom)))

return
return pom

Expand All @@ -709,6 +726,7 @@ def parse(location=None, text=None, check_is_pom=True, extra_properties=None):
return

pom = mavenpom.to_dict()
if TRACE: logger.debug('parse: pom:.to_dict()\n{}'.format(pformat(pom)))

# join all data in a single text
asserted_license = []
Expand Down Expand Up @@ -738,30 +756,97 @@ def parse(location=None, text=None, check_is_pom=True, extra_properties=None):
url=cont['url'],
))

name = pom['organization_name']
url = pom['organization_url']
if name or url:
parties.append(models.Party(type=models.party_org, name=name, role='owner', url=url))
party_name = pom['organization_name']
party_url = pom['organization_url']
if party_name or party_url:
parties.append(models.Party(type=models.party_org, name=party_name, role='owner', url=party_url))

dependencies = OrderedDict()
dependencies = []
for scope, deps in pom['dependencies'].items():
scoped_deps = dependencies[scope] = []
if TRACE: logger.debug('parse: dependencies.deps: {}'.format(deps))

if scope:
scope = scope.strip().lower()
if not scope:
# maven default
scope = 'compile'

for dep in deps:
scoped_deps.append(models.BasePackage(
type=MAVEN_POM_TYPE,
name='{group_id}:{artifact_id}'.format(**dep),
version=dep['version'],
))
dgroup_id = dep['group_id']
dartifact_id = dep['artifact_id']
dversion = dep['version']
drequired = dep['required']

if TRACE:
logger.debug('parse: dependencies.deps: {}, {}, {}, {}'.format(
dgroup_id, dartifact_id, dversion, drequired))

# pymaven whart
if dversion == 'latest.release':
dversion = None

dqualifiers = {}
# FIXME: this is missing from the original Pom parser
# classifier = dep.get('classifier')
# if classifier:
# qualifiers['classifier'] = classifier
#
# packaging = dep.get('type')
# if packaging and packaging != 'jar':
# qualifiers['packaging'] = packaging

dep_id = models.PackageIdentifier(
type='maven',
namespace=dgroup_id,
name=dartifact_id,
qualifiers=dqualifiers or None,
)
# TODO: handle dependency management and pom type
is_runtime = scope in ('runtime', 'compile', 'system', 'provided')
is_optional = bool(scope in ('test',) or not drequired)

dep_pack = models.DependentPackage(
identifier=str(dep_id),
requirement=dversion,
scope=scope,
is_runtime=is_runtime,
is_optional=is_optional,
is_resolved=False,
)
dependencies.append(dep_pack)

# FIXME: there are still a lot of other data to map in a Package
version = pom['version']
# pymaven whart
if version == 'latest.release':
version = None

artifact_id = pom['artifact_id']
qualifiers = {}
classifier = pom['classifier']
if classifier:
qualifiers['classifier'] = classifier

packaging = pom['packaging']
if packaging and packaging != 'jar':
qualifiers['packaging'] = packaging

pname = pom['name']
pdesc = pom['description']
if pname==pdesc:
description = pname
else:
description = [d for d in (pom['name'], pom['description']) if d]
description = '\n'.join(description)

package = MavenPomPackage(
# FIXME: what is this location about?
location=location,
name='{group_id}:{artifact_id}'.format(**pom),
version=pom['version'],
description=pom['description'],
homepage_url=pom['url'],
asserted_license=asserted_license,
namespace=pom['group_id'],
name=artifact_id,
version=version,
qualifiers=qualifiers or None,
description=description or None,
homepage_url=pom['url'] or None,
asserted_license=asserted_license or None,
parties=parties,
dependencies=dependencies,
)
Expand All @@ -772,7 +857,6 @@ class MavenRecognizer(object):
"""
A package recognizer for Maven-based packages.
"""

def __init__(self):
return NotImplementedError()

Expand Down
Loading

0 comments on commit d8de78f

Please sign in to comment.