Skip to content
200 changes: 170 additions & 30 deletions easybuild/easyblocks/generic/cargo.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import tempfile
from glob import glob
from pathlib import Path
from typing import Dict, List, Union

import easybuild.tools.environment as env
import easybuild.tools.systemtools as systemtools
Expand Down Expand Up @@ -78,19 +79,136 @@
CARGO_CHECKSUM_JSON = '{{"files": {{}}, "package": "{checksum}"}}'


def get_workspace_members(crate_dir: Path):
"""Find all members of a cargo workspace in crate_dir.
def _parse_toml_list(value: str) -> List[str]:
"""Split a TOML list value"""
if not value.startswith('[') or not value.endswith(']'):
raise ValueError(f"'{value}' is not a TOML list")
value = value[1:-1].strip()
simple_str_markers = ('"""', "'''", "'")
current_value = ''
result = []
while value:
for marker in simple_str_markers:
if value.startswith(marker):
idx = value.index(marker, len(marker))
current_value += value[:idx + len(marker)]
value = value[idx + len(marker):].lstrip()
break
else:
if value.startswith('"'):
m = re.match(r'".*?(?<!\\)"', value, re.M)
current_value += m[0]
value = value[m.end():].lstrip()
# Not inside a string here
if value.startswith(','):
result.append(current_value)
current_value = ''
value = value[1:].lstrip()
else:
m = re.search('"|\'|,', value)
if m:
current_value += value[:m.start()].strip()
value = value[m.end():]
else:
current_value += value.strip()
break
if current_value:
result.append(current_value)
return result


def _clean_line(line: str, expected_end: Union[str, None]) -> str:
"""Remove comments and trim line"""
if '#' not in line:
return line.strip()
if expected_end is not None and expected_end[0] in ("'", '"'):
try:
idx = line.index(expected_end) + len(expected_end)
except ValueError:
return line.strip() # Ignore #-sign in multi-line string
else:
idx = 0
in_str = False
escaped = False
while idx < len(line):
c = line[idx]
if in_str:
if escaped:
if c == '\\':
escaped = False
elif c == '"':
in_str = False
elif c == '\\':
escaped = True
elif c == '#':
break
elif c == '"':
in_str = True
elif c == "'":
try:
idx = line.index("'", idx + 1)
except ValueError:
idx = len(line)
idx += 1
return line[:idx].strip()


def _parse_toml(file_or_content: Union[Path, str]) -> Dict[str, str]:
"""Minimally parse a TOML file into sections, keys and values

Values will be the raw strings (including quotes for string-typed values)"""

result: Dict[str, Union[str, List[str]]] = {}
pending_key = None
pending_value = None
expected_end = None
current_section = None
content = read_file(file_or_content) if isinstance(file_or_content, Path) else file_or_content
line_num = raw_line = None
start_end = {
'[': ']',
'{': '}',
'"""': '"""',
"'''": "'''",
}
try:
for line_num, raw_line in enumerate(content.splitlines()): # noqa B007: line_num used in error only
line: str = _clean_line(raw_line, expected_end)
if not line:
continue
if pending_key is None and line.startswith("[") and line.endswith("]"):
current_section = line.strip()[1:-1].strip()
result.setdefault(current_section, {})
continue
if pending_key is None:
key, val = line.split("=", 1)
pending_key = key.strip()
pending_value = val.strip()
for start, end in start_end.items():
if pending_value.startswith(start):
expected_end = end
break
else:
expected_end = None
else:
pending_value += '\n' + line
if expected_end is None or (pending_value != expected_end and pending_value.endswith(expected_end)):
result[current_section][pending_key] = pending_value.strip()
pending_key = None
except Exception as e:
raise ValueError(f'Failed to parse {file_or_content}, error {e} at line {line_num}: {raw_line}')
return result


(Minimally) parse the Cargo.toml file.
def _get_workspace_members(cargo_toml: Dict[str, str]):
"""Find all members of a cargo workspace in the parsed the Cargo.toml file.

Return a tuple: (has_package, workspace-members).
has_package determines if it is a virtual workspace ([workspace] and no [package])
workspace-members are all members (subfolder names) if it is a workspace, otherwise None
"""
cargo_toml = crate_dir / 'Cargo.toml'
lines = [line.strip() for line in read_file(cargo_toml).splitlines()]
# A virtual (workspace) manifest has no [package], but only a [workspace] section.
has_package = '[package]' in lines
has_package = 'package' in cargo_toml

# We are looking for this:
# [workspace]
Expand All @@ -101,30 +219,15 @@ def get_workspace_members(crate_dir: Path):
# ]

try:
start_idx = lines.index('[workspace]')
except ValueError:
workspace = cargo_toml['workspace']
except KeyError:
return has_package, None
# Find "members = [" and concatenate the value, stop at end of section or file
member_str = None
for line in lines[start_idx + 1:]:
if line.startswith('#'):
continue # Skip comments
if re.match(r'\[\w+\]', line):
break # New section
if member_str is None:
m = re.match(r'members\s+=\s+\[', line)
if m:
member_str = line[m.end():]
else:
member_str += line
# Stop if we reach the end of the list
if member_str is not None and member_str.endswith(']'):
member_str = member_str[:-1]
break
if member_str is None:
try:
member_strs = _parse_toml_list(workspace['members'])
except (KeyError, ValueError):
raise EasyBuildError('Failed to find members in %s', cargo_toml)
# Split at commas after removing possibly trailing ones and remove the quotes
members = [member.strip().strip('"') for member in member_str.rstrip(',').split(',')]
# Remove the quotes
members = [member.strip('"') for member in member_strs]
# Sanity check that we didn't pick up anything unexpected
invalid_members = [member for member in members if not re.match(r'(\w|-)+', member)]
if invalid_members:
Expand All @@ -133,6 +236,39 @@ def get_workspace_members(crate_dir: Path):
return has_package, members


def _merge_sub_crate(cargo_toml_path: Path, workspace_toml: Dict[str, str]):
"""Resolve workspace references in the Cargo.toml file"""
# Lines such as 'authors.workspace = true' must be replaced by 'authors = <value from workspace.package>'
content: str = read_file(cargo_toml_path)
SUFFIX = '.workspace' # Suffix of keys that refer to workspace values
if 'workspace = true' not in content:
return
cargo_toml = _parse_toml(content)
lines = content.splitlines()

def do_replacement(section, workspace_section):
if not section or not workspace_section:
return

for key, value in section.items():
if (key.endswith(SUFFIX) and value == 'true') or value == '{ workspace = true }':
real_key = key[:-len(SUFFIX)] if key.endswith(SUFFIX) else key
new_value = workspace_section[real_key]
try:
idx = next(idx for idx, line in enumerate(lines)
if line.lstrip().startswith(f'{key} =') and value in line)
except StopIteration:
raise ValueError(f"Failed to find line for key '{key}' while merging {cargo_toml_path}")
lines[idx] = f'{real_key} = {new_value}'

do_replacement(cargo_toml.get('package'), workspace_toml.get('workspace.package'))
do_replacement(cargo_toml.get('dependencies'), workspace_toml.get('workspace.dependencies'))
do_replacement(cargo_toml.get('build-dependencies'), workspace_toml.get('workspace.dependencies'))
do_replacement(cargo_toml.get('dev-dependencies'), workspace_toml.get('workspace.dependencies'))

write_file(cargo_toml_path, '\n'.join(lines))


def get_checksum(src, log):
"""Get the checksum from an extracted source"""
checksum = src['checksum']
Expand Down Expand Up @@ -354,7 +490,8 @@ def _setup_offline_config(self, git_sources):
tmp_dir = Path(tempfile.mkdtemp(dir=self.builddir, prefix='tmp_crate_'))
# Add checksum file for each crate such that it is recognized by cargo.
# Glob to catch multiple folders in a source archive.
for crate_dir in (p.parent for p in Path(self.vendor_dir).glob('*/Cargo.toml')):
for cargo_toml in Path(self.vendor_dir).glob('*/Cargo.toml'):
crate_dir = cargo_toml.parent
src = path_to_source.get(str(crate_dir))
if src:
try:
Expand All @@ -372,7 +509,8 @@ def _setup_offline_config(self, git_sources):
# otherwise (Only "[workspace]" section and no "[package]" section)
# we have to remove the top-level folder or cargo fails with:
# "found a virtual manifest at [...]Cargo.toml instead of a package manifest"
has_package, members = get_workspace_members(crate_dir)
parsed_toml = _parse_toml(cargo_toml)
has_package, members = _get_workspace_members(parsed_toml)
if members:
self.log.info(f'Found workspace in {crate_dir}. Members: ' + ', '.join(members))
if not any((crate_dir / crate).is_dir() for crate in members):
Expand All @@ -397,6 +535,8 @@ def _setup_offline_config(self, git_sources):
# Use copy_dir to resolve symlinks that might point to the parent folder
copy_dir(tmp_crate_dir / member, target_path, symlinks=False)
cargo_pkg_dirs.append(target_path)
self.log.info(f'Resolving workspace values for crate {member}')
_merge_sub_crate(target_path / 'Cargo.toml', parsed_toml)
if has_package:
# Remove the copied crate folders
for member in members:
Expand Down
Loading