Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 86 additions & 0 deletions minecode/collectors/pub.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# purldb is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/purldb for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import logging
import requests
from packageurl import PackageURL

from minecode.miners.pub import build_packages
from minecode import priority_router
from packagedb.models import PackageContentType

logger = logging.getLogger(__name__)
handler = logging.StreamHandler()
logger.addHandler(handler)
logger.setLevel(logging.INFO)


def get_pub_package_json(name, version=None):
"""
Return the metadata JSON for a package from pub.dev API.
Example: https://pub.dev/api/packages/flutter
"""
if not version:
url = f"https://pub.dev/api/packages/{name}"
else:
url = f"https://pub.dev/api/packages/{name}/versions/{version}"

try:
response = requests.get(url)
response.raise_for_status()
return response.json()
except requests.exceptions.HTTPError as err:
logger.error(f"HTTP error occurred: {err}")


def map_pub_package(package_url, pipelines, priority=0):
"""
Add a pub `package_url` to the PackageDB.
"""
from minecode.model_utils import add_package_to_scan_queue, merge_or_create_package

name = package_url.name
package_json = get_pub_package_json(name=name, version=package_url.version)

if not package_json:
error = f"Package does not exist on pub.dev: {package_url}"
logger.error(error)
return error

packages = build_packages(package_json, package_url)
error = None
for package in packages:
package.extra_data["package_content"] = PackageContentType.SOURCE_ARCHIVE
db_package, _, _, error = merge_or_create_package(package, visit_level=0)
if error:
break
print(db_package)
if db_package:
add_package_to_scan_queue(package=db_package, pipelines=pipelines, priority=priority)

return error


@priority_router.route("pkg:pub/.*")
def process_request(purl_str, **kwargs):
"""
Process `priority_resource_uri` containing a pub Package URL (PURL).
"""
from minecode.model_utils import DEFAULT_PIPELINES

addon_pipelines = kwargs.get("addon_pipelines", [])
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
priority = kwargs.get("priority", 0)

package_url = PackageURL.from_string(purl_str)

error_msg = map_pub_package(package_url, pipelines, priority)

if error_msg:
return error_msg
75 changes: 75 additions & 0 deletions minecode/miners/pub.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# purldb is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
#

from packageurl import PackageURL
from packagedcode import models as scan_models


def build_single_package(version_info, package_name):
"""
Build a single PackageData object from pub.dev version metadata.
`version_info` is a dict, as returned under "versions" or from
https://pub.dev/api/packages/<name>/versions/<version>
"""
version = version_info.get("version")
pubspec = version_info.get("pubspec", {}) or {}

description = pubspec.get("description")
homepage_url = pubspec.get("homepage")
repository_url = pubspec.get("repository")
issue_tracker = pubspec.get("issue_tracker")
license_decl = pubspec.get("license")

extracted_license_statement = []
if license_decl and license_decl.lower() != "unknown":
extracted_license_statement.append(license_decl)

common_data = dict(
name=package_name,
version=version,
description=description,
homepage_url=homepage_url,
repository_homepage_url=repository_url,
bug_tracking_url=issue_tracker,
extracted_license_statement=extracted_license_statement,
parties=[],
)

archive_url = f"https://pub.dev/packages/{package_name}/versions/{version}.tar.gz"

download_data = dict(
datasource_id="pub_pkginfo",
type="pub",
download_url=archive_url,
)
download_data.update(common_data)

package = scan_models.PackageData.from_data(download_data)
package.datasource_id = "pub_api_metadata"
package.set_purl(PackageURL(type="pub", name=package_name, version=version))

return package


def build_packages(metadata_dict, purl):
"""
Yield one or more PackageData objects from pub.dev metadata.
If purl.version is set, use the single-version API response.
Otherwise, use the all-versions API response.
"""
if isinstance(purl, str):
purl = PackageURL.from_string(purl)

purl_version = purl.version
package_name = purl.name

if purl_version:
package = build_single_package(metadata_dict, package_name)
yield package
else:
versions = metadata_dict.get("versions", [])
for version_info in versions:
yield build_single_package(version_info, package_name)
6 changes: 4 additions & 2 deletions minecode/model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,8 +391,10 @@ def merge_or_create_package(scanned_package, visit_level, override=False, filena

stringify_null_purl_fields(package_data)

created_package = Package.objects.create(**package_data)
created_package.append_to_history(f"New Package created from URI: {package_uri}")
# if we try to create a package more than once it should not fail
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JonoYang please check this change once! thanks!

created_package, created = Package.objects.get_or_create(**package_data)
if created:
created_package.append_to_history(f"New Package created from URI: {package_uri}")

# This is used in the case of Maven packages created from the priority queue
for h in history:
Expand Down
58 changes: 58 additions & 0 deletions minecode/tests/collectors/test_pub.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# purldb is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0
# See https://github.com/nexB/purldb for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import json
import os

from django.test import TestCase as DjangoTestCase
from packageurl import PackageURL

import packagedb
from minecode.collectors import pub
from minecode.utils_test import JsonBasedTesting


class PubPriorityQueueTests(JsonBasedTesting, DjangoTestCase):
test_data_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "testfiles")

def setUp(self):
super().setUp()
self.expected_json_loc = self.get_test_loc("pub/flutter.json")
with open(self.expected_json_loc) as f:
self.expected_json_contents = json.load(f)

def test_get_pub_package_json(self):
"""
Verify get_pub_package_json() returns expected keys for a pub package.
"""
json_contents = pub.get_pub_package_json(name="flutter")
self.assertIn("name", json_contents)
self.assertEqual("flutter", json_contents["name"])
self.assertIn("versions", json_contents)

def test_map_pub_package(self):
"""
Verify map_pub_package() creates a Package in the DB with correct PURL
and download URL.
"""
package_count = packagedb.models.Package.objects.all().count()
self.assertEqual(0, package_count)

package_url = PackageURL.from_string("pkg:pub/[email protected]")
pub.map_pub_package(package_url, ("test_pipeline",))

package_count = packagedb.models.Package.objects.all().count()
self.assertEqual(1, package_count)

package = packagedb.models.Package.objects.all().first()
expected_purl_str = "pkg:pub/[email protected]"
expected_download_url = "https://pub.dev/packages/flutter/versions/0.0.1.tar.gz"

self.assertEqual(expected_purl_str, package.purl)
self.assertEqual(expected_download_url, package.download_url)
16 changes: 16 additions & 0 deletions minecode/tests/testfiles/pub/flutter.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"version": "0.0.1",
"pubspec": {
"environment": {
"sdk": ">=1.12.0 <2.0.0"
},
"homepage": "http://flutter.io",
"version": "0.0.1",
"name": "flutter",
"author": "Flutter Authors <[email protected]>",
"description": "A framework for writing Flutter applications"
},
"archive_url": "https://pub.dev/api/archives/flutter-0.0.1.tar.gz",
"archive_sha256": "aec09e0c68fe848fc37089e29a64cf8dbc1e232e1e98e05af9b68114c699447d",
"published": "2015-09-19T17:58:43.990Z"
}
Loading