Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 88 additions & 0 deletions minecode/collectors/composer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# purldb is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/purldb for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import logging
import requests
from packageurl import PackageURL
from minecode.miners.composer import build_packages

from minecode import priority_router
from packagedb.models import PackageContentType

logger = logging.getLogger(__name__)
handler = logging.StreamHandler()
logger.addHandler(handler)
logger.setLevel(logging.INFO)


def get_composer_package_json(name):
"""
Return the contents of the JSON file of the package from Packagist.
Example: https://repo.packagist.org/p2/laravel/laravel.json
"""
url = f"https://repo.packagist.org/p2/{name}.json"

try:
response = requests.get(url)
response.raise_for_status()
return response.json()
except requests.exceptions.HTTPError as err:
logger.error(f"HTTP error occurred: {err}")


def map_composer_package(package_url, pipelines, priority=0):
"""
Add a composer `package_url` to the PackageDB.
"""
from minecode.model_utils import add_package_to_scan_queue, merge_or_create_package

namespace = package_url.namespace
name = package_url.name

package_name = f"{namespace}/{name}" if namespace else name

package_json = get_composer_package_json(name=package_name)

if not package_json:
error = f"Package does not exist on packagist.org: {package_url}"
logger.error(error)
return error

packages = build_packages(package_json, package_url)

error = None
for package in packages:
package.extra_data["package_content"] = PackageContentType.SOURCE_ARCHIVE
db_package, _, _, error = merge_or_create_package(package, visit_level=0)
if error:
break

if db_package:
add_package_to_scan_queue(package=db_package, pipelines=pipelines, priority=priority)

return error


@priority_router.route("pkg:composer/.*")
def process_request(purl_str, **kwargs):
"""
Process `priority_resource_uri` containing a composer Package URL (PURL).
"""
from minecode.model_utils import DEFAULT_PIPELINES

addon_pipelines = kwargs.get("addon_pipelines", [])
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
priority = kwargs.get("priority", 0)

package_url = PackageURL.from_string(purl_str)

error_msg = map_composer_package(package_url, pipelines, priority)

if error_msg:
return error_msg
74 changes: 74 additions & 0 deletions minecode/miners/composer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# purldb is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/purldb for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

from packagedcode import models as scan_models


def build_packages(metadata_dict, purl):
"""
Yield ScannedPackage built from packagist.org API.

metadata_dict format:
{
"packages": {
"vendor/package": [
{ version metadata... }
]
}
}
"""
purl_version = purl.version
package_name = f"{purl.namespace}/{purl.name}" if purl.namespace else purl.name

packages = metadata_dict.get("packages") or {}
versions = packages.get(package_name) or []

for version_info in versions:
version_normalized = version_info.get("version_normalized")
version = version_info.get("version")
if purl_version and not (purl_version == version or purl_version == version_normalized):
continue

description = version_info.get("description")
homepage_url = version_info.get("homepage")
repository_url = version_info.get("source", {}).get("url")

extracted_license_statement = version_info.get("license") or []

authors = version_info.get("authors", [])
parties = []
for author in authors:
parties.append(scan_models.Party(name=author.get("name"), role="author"))

dist = version_info.get("dist", {})
download_url = dist.get("url")
sha1 = dist.get("shasum")

common_data = dict(
name=purl.name,
version=version,
description=description,
homepage_url=homepage_url,
repository_homepage_url=repository_url,
extracted_license_statement=extracted_license_statement,
parties=parties,
)

if download_url:
download_data = dict(
datasource_id="composer_pkginfo",
type="composer",
download_url=download_url,
sha1=sha1,
)
download_data.update(common_data)
package = scan_models.PackageData.from_data(download_data)
package.datasource_id = "composer_api_metadata"
package.set_purl(purl)
yield package
61 changes: 61 additions & 0 deletions minecode/tests/collectors/test_composer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# purldb is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/purldb for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import json
import os

from django.test import TestCase as DjangoTestCase
from packageurl import PackageURL

import packagedb
from minecode.collectors import composer
from minecode.utils_test import JsonBasedTesting


class ComposerPriorityQueueTests(JsonBasedTesting, DjangoTestCase):
test_data_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "testfiles")

def setUp(self):
super().setUp()
# Sample Packagist metadata for laravel/laravel
self.expected_json_loc = self.get_test_loc("composer/laravel-laravel.json")
with open(self.expected_json_loc) as f:
self.expected_json_contents = json.load(f)

def test_get_package_json(self):
"""
Verify that get_composer_package_json() fetches metadata and contains
the expected "packages" structure, with laravel/laravel present.
"""
json_contents = composer.get_composer_package_json(name="laravel/laravel")
self.assertIn("packages", json_contents)
self.assertIn("laravel/laravel", json_contents["packages"])

def test_map_composer_package(self):
"""
Verify that map_composer_package() creates a Package in the DB with the
correct PURL and download URL from Packagist metadata.
"""
package_count = packagedb.models.Package.objects.all().count()
self.assertEqual(0, package_count)

package_url = PackageURL.from_string("pkg:composer/laravel/[email protected]")
composer.map_composer_package(package_url, ("test_pipeline",))

package_count = packagedb.models.Package.objects.all().count()
self.assertEqual(1, package_count)

package = packagedb.models.Package.objects.all().first()
expected_purl_str = "pkg:composer/laravel/[email protected]"

# dist.url from Packagist metadata is expected to be something like:
# https://api.github.com/repos/laravel/laravel/zipball/<commit>
self.assertEqual(expected_purl_str, package.purl)
self.assertTrue(package.download_url.startswith("https://"))
self.assertIn("laravel", package.download_url.lower())
Loading