Skip to content

Commit 17a1d4c

Browse files
committed
Use GraphQL API for GitHub repo
Signed-off-by: Keshav Priyadarshi <[email protected]>
1 parent 81454ca commit 17a1d4c

File tree

1 file changed

+125
-11
lines changed

1 file changed

+125
-11
lines changed

src/fetchcode/package_versions.py

+125-11
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
import dataclasses
1818
import logging
19+
import os
1920
import traceback
2021
import xml.etree.ElementTree as ET
2122
from datetime import datetime
@@ -24,11 +25,11 @@
2425
from urllib.parse import urlparse
2526

2627
import requests
28+
import yaml
2729
from dateutil import parser as dateparser
2830
from packageurl import PackageURL
2931
from packageurl.contrib.route import NoRouteAvailable
3032
from packageurl.contrib.route import Router
31-
import yaml
3233

3334
logger = logging.getLogger(__name__)
3435

@@ -270,15 +271,8 @@ def get_conan_versions_from_purl(purl):
270271
def get_github_versions_from_purl(purl):
271272
"""Fetch versions of ``github`` packages using GitHub REST API."""
272273
purl = PackageURL.from_string(purl)
273-
response = get_response(
274-
url=(f"https://api.github.com/repos/{purl.namespace}/{purl.name}/releases"),
275-
content_type="json",
276-
)
277-
for release in response:
278-
yield PackageVersion(
279-
value=release["tag_name"],
280-
release_date=dateparser.parse(release["published_at"]),
281-
)
274+
275+
yield from fetch_github_tags_gql(purl)
282276

283277

284278
@router.route("pkg:golang/.*")
@@ -336,7 +330,7 @@ def trim_go_url_path(url_path: str) -> Optional[str]:
336330
# some advisories contains this prefix in package name, e.g. https://github.com/advisories/GHSA-7h6j-2268-fhcm
337331
go_url_prefix = "https://pkg.go.dev/"
338332
if url_path.startswith(go_url_prefix):
339-
url_path = url_path[len(go_url_prefix):]
333+
url_path = url_path[len(go_url_prefix) :]
340334

341335
parsed_url_path = urlparse(url_path)
342336
path = parsed_url_path.path
@@ -521,3 +515,123 @@ def get_response(url, content_type="json", headers=None):
521515
def remove_debian_default_epoch(version):
522516
"""Remove the default epoch from a Debian ``version`` string."""
523517
return version and version.replace("0:", "")
518+
519+
520+
def fetch_github_tags_gql(purl):
521+
"""
522+
Yield PackageVersion for given github ``purl`` using the GitHub GQL API.
523+
"""
524+
for node in fetch_github_tag_nodes(purl):
525+
name = node["name"]
526+
target = node["target"]
527+
528+
# in case the tag is a signed tag, then the commit info is in target['target']
529+
if "committedDate" not in target:
530+
target = target["target"]
531+
532+
committed_date = target.get("committedDate")
533+
release_date = None
534+
if committed_date:
535+
release_date = dateparser.parse(committed_date)
536+
537+
yield PackageVersion(value=name, release_date=release_date)
538+
539+
540+
def fetch_github_tag_nodes(purl):
541+
"""
542+
Yield node name/target mappings for Git tags of the ``purl``.
543+
544+
Each node has this shape:
545+
{
546+
"name": "v2.6.24-rc5",
547+
"target": {
548+
"target": {
549+
"committedDate": "2007-12-11T03:48:43Z"
550+
}
551+
}
552+
},
553+
"""
554+
GQL_QUERY = """
555+
query getTags($name: String!, $owner: String!, $after: String)
556+
{
557+
repository(name: $name, owner: $owner) {
558+
refs(refPrefix: "refs/tags/", first: 100, after: $after) {
559+
totalCount
560+
pageInfo {
561+
endCursor
562+
hasNextPage
563+
}
564+
nodes {
565+
name
566+
target {
567+
... on Commit {
568+
committedDate
569+
}
570+
... on Tag {
571+
target {
572+
... on Commit {
573+
committedDate
574+
}
575+
}
576+
}
577+
}
578+
}
579+
}
580+
}
581+
}"""
582+
583+
variables = {
584+
"owner": purl.namespace,
585+
"name": purl.name,
586+
}
587+
graphql_query = {
588+
"query": GQL_QUERY,
589+
"variables": variables,
590+
}
591+
592+
while True:
593+
response = github_response(graphql_query)
594+
refs = response["data"]["repository"]["refs"]
595+
for node in refs["nodes"]:
596+
yield node
597+
598+
page_info = refs["pageInfo"]
599+
if not page_info["hasNextPage"]:
600+
break
601+
602+
# to fetch next page, we just set the after variable to endCursor
603+
variables["after"] = page_info["endCursor"]
604+
605+
606+
class GitHubTokenError(Exception):
607+
pass
608+
609+
610+
class GraphQLError(Exception):
611+
pass
612+
613+
614+
def github_response(graphql_query):
615+
gh_token = os.environ.get("GH_TOKEN", None)
616+
617+
if not gh_token:
618+
msg = (
619+
"GitHub API Token Not Set\n"
620+
"Set your GitHub token in the GH_TOKEN environment variable."
621+
)
622+
raise GitHubTokenError(msg)
623+
624+
headers = {"Authorization": f"bearer {gh_token}"}
625+
626+
endpoint = "https://api.github.com/graphql"
627+
response = requests.post(endpoint, headers=headers, json=graphql_query).json()
628+
629+
message = response.get("message")
630+
if message and message == "Bad credentials":
631+
raise GitHubTokenError(f"Invalid GitHub token: {message}")
632+
633+
errors = response.get("errors")
634+
if errors:
635+
raise GraphQLError(errors)
636+
637+
return response

0 commit comments

Comments
 (0)