Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Scaladex API integration #379

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 59 additions & 71 deletions coordinator/src/main/scala/Scaladex.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,84 +3,72 @@ import java.util.concurrent.TimeUnit.SECONDS
import scala.concurrent.*
import scala.concurrent.duration.*
import java.io.IOException
import java.time.Instant
import java.time.LocalDate

object Scaladex {
case class Pagination(current: Int, pageCount: Int, totalSize: Int)
// releaseDate is always UTC zoned
case class ArtifactMetadata(
final val ScaladexUrl = "https://index.scala-lang.org"

private def asyncGetWithRetry(url: String): AsyncResponse[requests.Response] = {
def tryGet(backoffSeconds: Int): AsyncResponse[requests.Response] =
Future { requests.get(url) }
.recoverWith {
case _: requests.TimeoutException =>
Console.err.println(
s"Failed to fetch artifact metadata, Scaladex request timeout, retry with backoff ${backoffSeconds}s for $url"
)
SECONDS.sleep(backoffSeconds)
tryGet((backoffSeconds * 2).min(60))
case e: requests.RequestsException if e.getMessage.contains("GOWAY") =>
Console.err.println(
s"Failed to fetch artifact metadata, Scaladex request terminated, retry with backoff ${backoffSeconds}s for $url"
)
SECONDS.sleep(backoffSeconds)
tryGet((backoffSeconds * 2).min(60))
}
tryGet(1)
}

def projects: AsyncResponse[Seq[Project]] = {
case class ProjectEntry(organization: String, repository: String)
asyncGetWithRetry(s"$ScaladexUrl/api/projects")
.map: response =>
fromJson[List[ProjectEntry]](response.text())
.map:
case ProjectEntry(organization, repository) =>
Project(organization, repository)
}

case class ProjectArtifact(groupId: String, artifactId: String, version: String)
def artifacts(project: Project): AsyncResponse[Seq[ProjectArtifact]] =
asyncGetWithRetry(s"$ScaladexUrl/api/projects/${project.org}/${project.name}/artifacts")
.map: response =>
fromJson[Seq[ProjectArtifact]](response.text())

case class Artifact(
groupId: String,
artifactId: String,
version: String,
releaseDate: java.time.OffsetDateTime
)
case class ArtifactMetadataResponse(
pagination: Pagination,
items: List[ArtifactMetadata]
)
artifactName: String,
project: String,
releaseDate: Long, // epoch-millis
licenses: Seq[String],
language: String,
platform: String
):
def releaseLocalData: LocalDate = LocalDate.from(Instant.ofEpochMilli(releaseDate))

def artifact(artifact: ProjectArtifact): AsyncResponse[Artifact] =
asyncGetWithRetry(
s"$ScaladexUrl/api/artifacts/${artifact.groupId}/${artifact.artifactId}/${artifact.version}"
)
.map: response =>
fromJson[Artifact](response.text())

case class ProjectSummary(
groupId: String,
artifacts: List[String], // List of artifacts with suffixes
version: String, // latest known versions
versions: List[String] // all published versions
)

final val ScaladexUrl = "https://index.scala-lang.org"

def artifactMetadata(
groupId: String,
artifactId: String
): AsyncResponse[ArtifactMetadataResponse] = {
def tryFetch(backoffSeconds: Int): AsyncResponse[ArtifactMetadataResponse] =
Future {
val response = requests.get(
url = s"$ScaladexUrl/api/artifacts/$groupId/$artifactId"
)
fromJson[ArtifactMetadataResponse](response.text())
}.recoverWith {
case err: org.jsoup.HttpStatusException
if err.getStatusCode == 503 && !Thread.interrupted() =>
Console.err.println(
s"Failed to fetch artifact metadata, Scaladex unavailable, retry with backoff ${backoffSeconds}s for $groupId:$artifactId"
)
SECONDS.sleep(backoffSeconds)
tryFetch((backoffSeconds * 2).min(60))
case _: requests.TimeoutException =>
Console.err.println(
s"Failed to fetch artifact metadata, Scaladex request timeout, retry with backoff ${backoffSeconds}s for $groupId:$artifactId"
)
SECONDS.sleep(backoffSeconds)
tryFetch((backoffSeconds * 2).min(60))
case e: requests.RequestsException if e.getMessage.contains("GOWAY") =>
Console.err.println(
s"Failed to fetch artifact metadata, Scaladex request terminated, retry with backoff ${backoffSeconds}s for $groupId:$artifactId"
)
SECONDS.sleep(backoffSeconds)
tryFetch((backoffSeconds * 2).min(60))
}
tryFetch(1)
}

def projectSummary(
organization: String,
repository: String,
scalaBinaryVersion: String
): AsyncResponse[Option[ProjectSummary]] = Future {
val response = requests.get(
url = s"$ScaladexUrl/api/project",
params = Map(
"organization" -> organization,
"repository" -> repository,
"target" -> "JVM",
"scalaVersion" -> scalaBinaryVersion
)
)
// If output is empty it means that given project does not define JVM modules
// for given scala version
Option.unless(response.contentLength.contains(0)) {
fromJson[ProjectSummary](response.text())
}
}.recoverWith{
case _: requests.TimeoutException =>
Thread.sleep(scala.util.Random.nextInt(10.seconds.toMillis.toInt))
projectSummary(organization, repository, scalaBinaryVersion)
}

}
115 changes: 42 additions & 73 deletions coordinator/src/main/scala/deps.scala
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,13 @@ def loadProjects(scalaBinaryVersion: String): Seq[StarredProject] =
d.select(".list-result .row").asScala.flatMap { e =>
e.select("h4").get(0).text().takeWhile(!_.isWhitespace) match {
case s"${organization}/${repository}" =>
for
ghStars <- e.select(".stats [title=Stars]")
.asScala
.headOption
.flatMap(_.text.toIntOption)
.orElse(Some(-1))
yield
StarredProject(organization, repository)(ghStars)
for ghStars <- e
.select(".stats [title=Stars]")
.asScala
.headOption
.flatMap(_.text.toIntOption)
.orElse(Some(-1))
yield StarredProject(organization, repository)(ghStars)
case _ => None
}
}
Expand All @@ -58,72 +57,42 @@ enum CandidateProject:
case BuildSelected(project: Project, mvs: Seq[ModuleInVersion])
case class ProjectModules(project: Project, mvs: Seq[ModuleInVersion])

def loadScaladexProject(
scalaBinaryVersion: String,
releaseCutOffDate: Option[LocalDate]
)(
def loadScaladexProject(releaseCutOffDate: Option[LocalDate] = None)(
project: Project
): AsyncResponse[ProjectModules] =
): AsyncResponse[ProjectModules] = {
import util.*
val binaryVersionSuffix = "_" + scalaBinaryVersion
Scaladex
.projectSummary(project.org, project.name, scalaBinaryVersion)
.flatMap {
case None =>
Console.err.println(
s"No project summary for ${project.org}/${project.name}"
)
Future.successful(Nil)
case Some(projectSummary) =>
val releaseDates = collection.mutable.Map.empty[String, OffsetDateTime]
case class VersionRelease(version: String, releaseDate: OffsetDateTime)
for
artifactsMetadata <- Future
.traverse(projectSummary.artifacts) { artifact =>
Scaladex
.artifactMetadata(
groupId = projectSummary.groupId,
artifactId = s"${artifact}_3"
)
.map { response =>
if (response.pagination.pageCount != 1)
Console.err.println(
"Scaladex now implementes pagination! Ignoring artifact metadata from additional pages"
)
// Order versions based on their release date, it should be more stable in case of hash-based pre-releases
// Previous approach with sorting SemVersion was not stable and could lead to runtime erros (due to not transitive order of elements)
val versions = response.items
.filter(v =>
releaseCutOffDate
.forall(_.isAfter(v.releaseDate.toLocalDate()))
)
.tapEach(v => releaseDates += v.version -> v.releaseDate)
.map(_.version)
artifact -> versions
}
}
.map(_.toMap)
orderedVersions = projectSummary.versions
.flatMap(v => releaseDates.get(v).map(VersionRelease(v, _)))
.sortBy(_.releaseDate)(using
summon[Ordering[OffsetDateTime]].reverse
)
.map(_.version)
yield for version <- orderedVersions
yield ModuleInVersion(
version,
modules = artifactsMetadata.collect {
case (module, versions) if versions.contains(version) => module
}.toSeq
)
}
.map { moduleVersions =>
val modules = moduleVersions
.filter(_.modules.nonEmpty)
.map(mvs => VersionedModules(mvs, mvs.version))
.map(_.modules)
ProjectModules(project, modules)
}
for {
scala3JvmArtifacts <- Scaladex
.artifacts(project)
.map:
_.filter:
_.artifactId match
case s"${_}_native${_}" => false
case s"${_}_sjs${_}" => false
case s"${_}_3" => true
case _ => false
artifactsByVersion = scala3JvmArtifacts.groupBy(_.version)
versionReleaseData <- Future
.traverse(artifactsByVersion) { case (version, artifacts) =>
Scaladex
.artifact(artifacts.head)
.filter: artifact =>
releaseCutOffDate.forall(_.isAfter(artifact.releaseLocalData))
.map: artifact =>
(version, artifact.releaseDate)
}
.map(_.toMap)
orderedVersions = versionReleaseData.toSeq
.sortBy(-_._2) // releaseDate-epoch-mill descending
.map(_._1)
versionModules =
for version <- orderedVersions
yield ModuleInVersion(
version = version,
modules = artifactsByVersion(version).map(_.artifactId.stripSuffix("_3"))
)
} yield ProjectModules(project, versionModules)
}

case class VersionedModules(modules: ModuleInVersion, semVersion: SemVersion)
case class ModuleVersion(name: String, version: String, p: Project)
Expand Down Expand Up @@ -219,7 +188,7 @@ def loadDepenenecyGraph(
if customProjects.contains(p) then Future.successful(CandidateProject.BuildAll(p))
else
cachedAsync { (p: Project) =>
loadScaladexProject(scalaBinaryVersion, releaseCutOffDate)(p)
loadScaladexProject(releaseCutOffDate)(p)
.map(projectModulesFilter(patterns))
}(p).map { case ProjectModules(project, mvs) =>
CandidateProject.BuildSelected(project, mvs)
Expand Down