Skip to content

Commit 8dde864

Browse files
authored
Add collector for pub ecosystem (#712)
Signed-off-by: Tushar Goel <[email protected]>
1 parent eabef3a commit 8dde864

File tree

5 files changed

+239
-2
lines changed

5 files changed

+239
-2
lines changed

minecode/collectors/pub.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/nexB/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import logging
11+
import requests
12+
from packageurl import PackageURL
13+
14+
from minecode.miners.pub import build_packages
15+
from minecode import priority_router
16+
from packagedb.models import PackageContentType
17+
18+
logger = logging.getLogger(__name__)
19+
handler = logging.StreamHandler()
20+
logger.addHandler(handler)
21+
logger.setLevel(logging.INFO)
22+
23+
24+
def get_pub_package_json(name, version=None):
25+
"""
26+
Return the metadata JSON for a package from pub.dev API.
27+
Example: https://pub.dev/api/packages/flutter
28+
"""
29+
if not version:
30+
url = f"https://pub.dev/api/packages/{name}"
31+
else:
32+
url = f"https://pub.dev/api/packages/{name}/versions/{version}"
33+
34+
try:
35+
response = requests.get(url)
36+
response.raise_for_status()
37+
return response.json()
38+
except requests.exceptions.HTTPError as err:
39+
logger.error(f"HTTP error occurred: {err}")
40+
41+
42+
def map_pub_package(package_url, pipelines, priority=0):
43+
"""
44+
Add a pub `package_url` to the PackageDB.
45+
"""
46+
from minecode.model_utils import add_package_to_scan_queue, merge_or_create_package
47+
48+
name = package_url.name
49+
package_json = get_pub_package_json(name=name, version=package_url.version)
50+
51+
if not package_json:
52+
error = f"Package does not exist on pub.dev: {package_url}"
53+
logger.error(error)
54+
return error
55+
56+
packages = build_packages(package_json, package_url)
57+
error = None
58+
for package in packages:
59+
package.extra_data["package_content"] = PackageContentType.SOURCE_ARCHIVE
60+
db_package, _, _, error = merge_or_create_package(package, visit_level=0)
61+
if error:
62+
break
63+
print(db_package)
64+
if db_package:
65+
add_package_to_scan_queue(package=db_package, pipelines=pipelines, priority=priority)
66+
67+
return error
68+
69+
70+
@priority_router.route("pkg:pub/.*")
71+
def process_request(purl_str, **kwargs):
72+
"""
73+
Process `priority_resource_uri` containing a pub Package URL (PURL).
74+
"""
75+
from minecode.model_utils import DEFAULT_PIPELINES
76+
77+
addon_pipelines = kwargs.get("addon_pipelines", [])
78+
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
79+
priority = kwargs.get("priority", 0)
80+
81+
package_url = PackageURL.from_string(purl_str)
82+
83+
error_msg = map_pub_package(package_url, pipelines, priority)
84+
85+
if error_msg:
86+
return error_msg

minecode/miners/pub.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
#
6+
7+
from packageurl import PackageURL
8+
from packagedcode import models as scan_models
9+
10+
11+
def build_single_package(version_info, package_name):
12+
"""
13+
Build a single PackageData object from pub.dev version metadata.
14+
`version_info` is a dict, as returned under "versions" or from
15+
https://pub.dev/api/packages/<name>/versions/<version>
16+
"""
17+
version = version_info.get("version")
18+
pubspec = version_info.get("pubspec", {}) or {}
19+
20+
description = pubspec.get("description")
21+
homepage_url = pubspec.get("homepage")
22+
repository_url = pubspec.get("repository")
23+
issue_tracker = pubspec.get("issue_tracker")
24+
license_decl = pubspec.get("license")
25+
26+
extracted_license_statement = []
27+
if license_decl and license_decl.lower() != "unknown":
28+
extracted_license_statement.append(license_decl)
29+
30+
common_data = dict(
31+
name=package_name,
32+
version=version,
33+
description=description,
34+
homepage_url=homepage_url,
35+
repository_homepage_url=repository_url,
36+
bug_tracking_url=issue_tracker,
37+
extracted_license_statement=extracted_license_statement,
38+
parties=[],
39+
)
40+
41+
archive_url = f"https://pub.dev/packages/{package_name}/versions/{version}.tar.gz"
42+
43+
download_data = dict(
44+
datasource_id="pub_pkginfo",
45+
type="pub",
46+
download_url=archive_url,
47+
)
48+
download_data.update(common_data)
49+
50+
package = scan_models.PackageData.from_data(download_data)
51+
package.datasource_id = "pub_api_metadata"
52+
package.set_purl(PackageURL(type="pub", name=package_name, version=version))
53+
54+
return package
55+
56+
57+
def build_packages(metadata_dict, purl):
58+
"""
59+
Yield one or more PackageData objects from pub.dev metadata.
60+
If purl.version is set, use the single-version API response.
61+
Otherwise, use the all-versions API response.
62+
"""
63+
if isinstance(purl, str):
64+
purl = PackageURL.from_string(purl)
65+
66+
purl_version = purl.version
67+
package_name = purl.name
68+
69+
if purl_version:
70+
package = build_single_package(metadata_dict, package_name)
71+
yield package
72+
else:
73+
versions = metadata_dict.get("versions", [])
74+
for version_info in versions:
75+
yield build_single_package(version_info, package_name)

minecode/model_utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -391,8 +391,10 @@ def merge_or_create_package(scanned_package, visit_level, override=False, filena
391391

392392
stringify_null_purl_fields(package_data)
393393

394-
created_package = Package.objects.create(**package_data)
395-
created_package.append_to_history(f"New Package created from URI: {package_uri}")
394+
# if we try to create a package more than once it should not fail
395+
created_package, created = Package.objects.get_or_create(**package_data)
396+
if created:
397+
created_package.append_to_history(f"New Package created from URI: {package_uri}")
396398

397399
# This is used in the case of Maven packages created from the priority queue
398400
for h in history:
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0
6+
# See https://github.com/nexB/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import json
11+
import os
12+
13+
from django.test import TestCase as DjangoTestCase
14+
from packageurl import PackageURL
15+
16+
import packagedb
17+
from minecode.collectors import pub
18+
from minecode.utils_test import JsonBasedTesting
19+
20+
21+
class PubPriorityQueueTests(JsonBasedTesting, DjangoTestCase):
22+
test_data_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "testfiles")
23+
24+
def setUp(self):
25+
super().setUp()
26+
self.expected_json_loc = self.get_test_loc("pub/flutter.json")
27+
with open(self.expected_json_loc) as f:
28+
self.expected_json_contents = json.load(f)
29+
30+
def test_get_pub_package_json(self):
31+
"""
32+
Verify get_pub_package_json() returns expected keys for a pub package.
33+
"""
34+
json_contents = pub.get_pub_package_json(name="flutter")
35+
self.assertIn("name", json_contents)
36+
self.assertEqual("flutter", json_contents["name"])
37+
self.assertIn("versions", json_contents)
38+
39+
def test_map_pub_package(self):
40+
"""
41+
Verify map_pub_package() creates a Package in the DB with correct PURL
42+
and download URL.
43+
"""
44+
package_count = packagedb.models.Package.objects.all().count()
45+
self.assertEqual(0, package_count)
46+
47+
package_url = PackageURL.from_string("pkg:pub/[email protected]")
48+
pub.map_pub_package(package_url, ("test_pipeline",))
49+
50+
package_count = packagedb.models.Package.objects.all().count()
51+
self.assertEqual(1, package_count)
52+
53+
package = packagedb.models.Package.objects.all().first()
54+
expected_purl_str = "pkg:pub/[email protected]"
55+
expected_download_url = "https://pub.dev/packages/flutter/versions/0.0.1.tar.gz"
56+
57+
self.assertEqual(expected_purl_str, package.purl)
58+
self.assertEqual(expected_download_url, package.download_url)
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
{
2+
"version": "0.0.1",
3+
"pubspec": {
4+
"environment": {
5+
"sdk": ">=1.12.0 <2.0.0"
6+
},
7+
"homepage": "http://flutter.io",
8+
"version": "0.0.1",
9+
"name": "flutter",
10+
"author": "Flutter Authors <[email protected]>",
11+
"description": "A framework for writing Flutter applications"
12+
},
13+
"archive_url": "https://pub.dev/api/archives/flutter-0.0.1.tar.gz",
14+
"archive_sha256": "aec09e0c68fe848fc37089e29a64cf8dbc1e232e1e98e05af9b68114c699447d",
15+
"published": "2015-09-19T17:58:43.990Z"
16+
}

0 commit comments

Comments
 (0)