Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
222 changes: 153 additions & 69 deletions easybuild/easyblocks/generic/cargo.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/usr/bin/env python
##
# Copyright 2009-2025 Ghent University
#
Expand Down Expand Up @@ -32,16 +33,17 @@

import os
import re
from glob import glob

import easybuild.tools.environment as env
import easybuild.tools.systemtools as systemtools
from easybuild.tools.build_log import EasyBuildError, print_warning
from easybuild.framework.easyconfig import CUSTOM
from easybuild.framework.extensioneasyblock import ExtensionEasyBlock
from easybuild.tools.filetools import extract_file
from easybuild.tools.run import run_cmd
from easybuild.tools.build_log import EasyBuildError, print_warning
from easybuild.tools.config import build_option
from easybuild.tools.filetools import compute_checksum, mkdir, move_file, read_file, write_file, CHECKSUM_TYPE_SHA256
from easybuild.tools.filetools import CHECKSUM_TYPE_SHA256, compute_checksum, extract_file, mkdir, move_file
from easybuild.tools.filetools import read_file, write_file
from easybuild.tools.run import run_cmd
from easybuild.tools.toolchain.compiler import OPTARCH_GENERIC

CRATESIO_SOURCE = "https://crates.io/api/v1/crates"
Expand All @@ -60,7 +62,14 @@
git = "{url}"
rev = "{rev}"
replace-with = "vendored-sources"
"""

CONFIG_TOML_SOURCE_GIT_BRANCH = """
[source."{url}?rev={rev}"]
git = "{url}"
rev = "{rev}"
branch = "{branch}"
replace-with = "vendored-sources"
"""

CONFIG_TOML_SOURCE_GIT_WORKSPACE = """
Expand Down Expand Up @@ -154,12 +163,17 @@ def extra_options(extra_vars=None):
return extra_vars

@staticmethod
def crate_src_filename(pkg_name, pkg_version, _=None, rev=None):
def crate_src_filename(pkg_name, pkg_version, _url=None, rev=None):
"""Crate tarball filename based on package name, version and optionally git revision"""
parts = [pkg_name, pkg_version]
filename = [pkg_name, pkg_version]
filename_ext = '.tar.gz'

if rev is not None:
parts.append(rev)
return '-'.join(parts) + ".tar.gz"
# sources from a git repo
filename.append(rev[:8]) # append short commit hash
filename_ext = '.tar.xz' # use a reproducible archive format

return '-'.join(filename) + filename_ext

@staticmethod
def crate_download_filename(pkg_name, pkg_version):
Expand Down Expand Up @@ -254,27 +268,37 @@ def extract_step(self):
"""
Unpack the source files and populate them with required .cargo-checksum.json if offline
"""
vendor_dir = os.path.join(self.builddir, 'easybuild_vendor')
mkdir(vendor_dir)
self.vendor_dir = os.path.join(self.builddir, 'easybuild_vendor')
mkdir(self.vendor_dir)
# Sources from git repositories might contain multiple crates/folders in a so-called "workspace".
# If we put such a workspace into the vendor folder, cargo fails with
# "found a virtual manifest at [...]Cargo.toml instead of a package manifest".
# Hence we put those in a separate folder and only move "regular" crates into the vendor folder.
git_vendor_dir = os.path.join(self.builddir, 'easybuild_vendor_git')
mkdir(git_vendor_dir)
self.git_vendor_dir = os.path.join(self.builddir, 'easybuild_vendor_git')
mkdir(self.git_vendor_dir)

vendor_crates = {self.crate_src_filename(*crate): crate for crate in self.crates}
# Track git sources for building the cargo config and avoiding duplicated folders
git_sources = {}

for src in self.src:
# Check for git crates, `git_key` will be set to a true-ish value for those
# Check if the source is a vendored crate
try:
crate_name, _, git_repo, rev = vendor_crates[src['name']]
except (ValueError, KeyError):
crate = vendor_crates[src['name']]
except KeyError:
is_vendor_crate = False
else:
is_vendor_crate = True
crate_name = crate[0]
# Store crate for later
src['crate'] = crate

# Check for git crates, `git_key` will be set to a true-ish value for those
if not is_vendor_crate or len(crate) == 2:
git_key = None
else:
git_key = (git_repo, rev)
git_key = crate[2:]
git_repo, rev = git_key
self.log.debug("Sources of %s(%s) belong to git repo: %s rev %s",
crate_name, src['name'], git_repo, rev)
# Do a sanity check that sources for the same repo and revision are the same
Expand All @@ -295,10 +319,9 @@ def extract_step(self):
src['finalpath'] = previous_source['finalpath']
continue

is_vendor_crate = src['name'] in vendor_crates
# Extract dependency crates into vendor subdirectory, separate from sources of main package
if is_vendor_crate:
extraction_dir = git_vendor_dir if git_key else vendor_dir
extraction_dir = self.git_vendor_dir if git_key else self.vendor_dir
else:
extraction_dir = self.builddir

Expand Down Expand Up @@ -339,38 +362,86 @@ def extract_step(self):
write_file(chkfile, CARGO_CHECKSUM_JSON.format(checksum=checksum))
# Move non-workspace git crates to the vendor folder
if git_key and member_dirs is None:
src_dir = os.path.join(vendor_dir, os.path.basename(crate_dirs[0]))
src_dir = os.path.join(self.vendor_dir, os.path.basename(crate_dirs[0]))
self.log.debug('Moving crate %s without workspaces to vendor folder', crate_name)
move_file(crate_dirs[0], src_dir)

src['finalpath'] = src_dir
self._setup_offline_config(git_sources)

if self.cfg['offline']:
self.log.info("Setting vendored crates dir for offline operation")
config_toml = os.path.join(self.cargo_home, 'config.toml')
# Replace crates-io with vendored sources using build dir wide toml file in CARGO_HOME
self.log.debug("Writting config.toml entry for vendored crates from crate.io")
write_file(config_toml, CONFIG_TOML_SOURCE_VENDOR.format(vendor_dir=vendor_dir), append=True)

# Tell cargo about the vendored git sources to avoid it failing with:
# Unable to update https://github.com/[...]
# can't checkout from 'https://github.com/[...]]': you are in the offline mode (--offline)
for (git_repo, rev), src in git_sources.items():
self.log.debug("Writting config.toml entry for git repo: %s rev %s", git_repo, rev)
src_dir = src['finalpath']
if os.path.dirname(src_dir) == vendor_dir:
# Non-workspace sources are in vendor_dir
write_file(config_toml,
CONFIG_TOML_SOURCE_GIT.format(url=git_repo, rev=rev),
append=True)
else:
# Workspace sources stay in their own separate folder.
# We cannot have a `directory = "<dir>"` entry where a folder containing a workspace is inside
write_file(config_toml,
CONFIG_TOML_SOURCE_GIT_WORKSPACE.format(url=git_repo, rev=rev, workspace_dir=src_dir),
append=True)
def _setup_offline_config(self, git_sources):
"""
Setup the configuration required for offline builds
:param git_sources: dict mapping (git_repo, rev) to extracted source
"""
if not self.cfg['offline']:
return
self.log.info("Setting up vendored crates for offline operation")
self.log.debug("Writting config.toml entry for vendored crates from crate.io")
config_toml = os.path.join(self.cargo_home, 'config.toml')
# Replace crates-io with vendored sources using build dir wide toml file in CARGO_HOME
write_file(config_toml, CONFIG_TOML_SOURCE_VENDOR.format(vendor_dir=self.vendor_dir))

# Tell cargo about the vendored git sources to avoid it failing with:
# Unable to update https://github.com/[...]
# can't checkout from 'https://github.com/[...]]': you are in the offline mode (--offline)

for (git_repo, rev), src in git_sources.items():
crate_name = src['crate'][0]
src_dir = src['finalpath']
if os.path.dirname(src_dir) == self.vendor_dir:
# Non-workspace sources are in vendor_dir
git_branch = self._get_crate_git_repo_branch(crate_name)
template = CONFIG_TOML_SOURCE_GIT_BRANCH if git_branch else CONFIG_TOML_SOURCE_GIT
self.log.debug(f"Writing config.toml entry for git repo: {git_repo} branch {git_branch}, rev {rev}")
write_file(
config_toml,
template.format(url=git_repo, rev=rev, branch=git_branch),
append=True
)
else:
self.log.debug("Writing config.toml entry for git repo: %s rev %s", git_repo, rev)
# Workspace sources stay in their own separate folder.
# We cannot have a `directory = "<dir>"` entry where a folder containing a workspace is inside
write_file(config_toml,
CONFIG_TOML_SOURCE_GIT_WORKSPACE.format(url=git_repo, rev=rev, workspace_dir=src_dir),
append=True)

# Use environment variable since it would also be passed along to builds triggered via python packages
env.setvar('CARGO_NET_OFFLINE', 'true')
# Use environment variable since it would also be passed along to builds triggered via python packages
env.setvar('CARGO_NET_OFFLINE', 'true')

def _get_crate_git_repo_branch(self, crate_name):
"""
Find the dependency definition for given crate in all Cargo.toml files of sources
Return branch target for given crate_name if any
"""
# Search all Cargo.toml files in main source and vendored crates
cargo_toml_files = sum((glob(os.path.join(path, '**', 'Cargo.toml'), recursive=True)
for path in (self.src[0]['finalpath'], self.vendor_dir, self.git_vendor_dir)),
start=[])
if not cargo_toml_files:
raise EasyBuildError("Cargo.toml file not found in sources")

self.log.debug(
f"Searching definition of crate '{crate_name}' in the following files: {', '.join(cargo_toml_files)}"
)

git_repo_spec = re.compile(re.escape(crate_name) + r"\s*=\s*{([^}]*)}", re.M)
git_branch_spec = re.compile(r'branch\s*=\s*"([^"]*)"', re.M)

for cargo_toml in cargo_toml_files:
git_repo_crate = git_repo_spec.search(read_file(cargo_toml))
if git_repo_crate:
self.log.debug(f"Found specification in {cargo_toml} for crate '{crate_name}': " +
git_repo_crate.group())
git_repo_crate_contents = git_repo_crate.group(1)
git_branch_crate = git_branch_spec.search(git_repo_crate_contents)
if git_branch_crate:
self.log.debug(f"Found git branch requirement for crate '{crate_name}': " +
git_branch_crate.group())
return git_branch_crate.group(1)

return None

def configure_step(self):
"""Empty configuration step."""
Expand Down Expand Up @@ -432,7 +503,8 @@ def install_step(self):

def generate_crate_list(sourcedir):
"""Helper for generating crate list"""
import toml
import toml # pylint: disable=import-outside-toplevel
from urllib.parse import parse_qs, urlsplit # pylint: disable=import-outside-toplevel

cargo_toml = toml.load(os.path.join(sourcedir, 'Cargo.toml'))

Expand All @@ -447,43 +519,55 @@ def generate_crate_list(sourcedir):
except KeyError:
app_name = os.path.basename(os.path.abspath(sourcedir))
print_warning('Did not find a [package] name= entry. Assuming it is the folder name: ' + app_name)
deps = cargo_lock['package']

app_in_cratesio = False
crates = []
other_crates = []
for dep in deps:
for dep in cargo_lock['package']:
name = dep['name']
version = dep['version']
if 'source' in dep:
if name == app_name:
app_in_cratesio = True # exclude app itself, needs to be first in crates list or taken from pypi
else:
if dep['source'] == 'registry+https://github.com/rust-lang/crates.io-index':
crates.append((name, version))
else:
# Lock file has #revision in the url
url, rev = dep['source'].rsplit('#', maxsplit=1)
for prefix in ('registry+', 'git+'):
if url.startswith(prefix):
url = url[len(prefix):]
# Remove branch name and revision URL parameters if present
url = re.sub(r'\?branch=\w+$', '', url)
url = re.sub(r'\?rev=%s+$' % rev, '', url)
crates.append((name, version, url, rev))
else:
try:
source_url = dep['source']
except KeyError:
other_crates.append((name, version))
continue
if name == app_name:
app_in_cratesio = True # exclude app itself, needs to be first in crates list or taken from pypi
else:
if source_url == 'registry+https://github.com/rust-lang/crates.io-index':
crates.append((name, version))
else:
# Lock file has revision and branch in the url
url = re.sub(r'^(registry|git)\+', '', source_url) # Strip prefix if present
parsed_url = urlsplit(url)
url = re.split('[#?]', url, maxsplit=1)[0] # Remove query and fragment
rev = parsed_url.fragment
if not rev:
raise ValueError("Revision not found in URL %s" % url)
qs = parse_qs(parsed_url.query)
rev_qs = qs.get('rev', [None])[0]
if rev_qs is not None and rev_qs != rev:
raise ValueError("Found different revision in query of URL "
"%s: %s, expected: %s" % (url, rev_qs, rev))
crates.append((name, version, url, rev))
return app_in_cratesio, crates, other_crates


if __name__ == '__main__':
import sys
def main():
import sys # pylint: disable=import-outside-toplevel
if len(sys.argv) != 2:
print('Expected path to folder containing Cargo.[toml,lock]')
sys.exit(1)
app_in_cratesio, crates, other = generate_crate_list(sys.argv[1])
print(other)
print('Other crates (no source in Cargo.lock):', other)
if app_in_cratesio or crates:
print('crates = [')
if app_in_cratesio:
print(' (name, version),')
for crate_info in crates:
for crate_info in sorted(crates):
print(" %s," % str(crate_info))
print(']')


if __name__ == '__main__':
main()