Skip to content

Commit

Permalink
Merge pull request #754 from nipype/enh/getitem-formatters
Browse files Browse the repository at this point in the history
ENH: Add more complete format string implementation for argstrings
  • Loading branch information
effigies authored May 29, 2024
2 parents b6db19b + fc3b31f commit 00b92ec
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 13 deletions.
26 changes: 21 additions & 5 deletions pydra/engine/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,12 +649,9 @@ def argstr_formatting(argstr, inputs, value_updates=None):
if value_updates:
inputs_dict.update(value_updates)
# getting all fields that should be formatted, i.e. {field_name}, ...
inp_fields = re.findall(r"{\w+}", argstr)
inp_fields_float = re.findall(r"{\w+:[0-9.]+f}", argstr)
inp_fields += [re.sub(":[0-9.]+f", "", el) for el in inp_fields_float]
inp_fields = parse_format_string(argstr)
val_dict = {}
for fld in inp_fields:
fld_name = fld[1:-1] # extracting the name form {field_name}
for fld_name in inp_fields:
fld_value = inputs_dict[fld_name]
fld_attr = getattr(attrs.fields(type(inputs)), fld_name)
if fld_value is attr.NOTHING or (
Expand Down Expand Up @@ -738,3 +735,22 @@ def parse_copyfile(fld: attr.Attribute, default_collation=FileSet.CopyCollation.
f"Unrecognised type for collation copyfile metadata of {fld}, {collation}"
)
return mode, collation


def parse_format_string(fmtstr):
"""Parse a argstr format string and return all keywords used in it."""
identifier = r"[a-zA-Z_]\w*"
attribute = rf"\.{identifier}"
item = r"\[\w+\]"
# Example: var.attr[key][0].attr2 (capture "var")
field_with_lookups = (
f"({identifier})(?:{attribute}|{item})*" # Capture only the keyword
)
conversion = "(?:!r|!s)"
nobrace = "[^{}]*"
# Example: 0{pads[hex]}x (capture "pads")
fmtspec = f"{nobrace}(?:{{({identifier}){nobrace}}}{nobrace})?" # Capture keywords in spec
full_field = f"{{{field_with_lookups}{conversion}?(?::{fmtspec})?}}"

all_keywords = re.findall(full_field, fmtstr)
return set().union(*all_keywords) - {""}
55 changes: 53 additions & 2 deletions pydra/engine/tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
from pathlib import Path
import random
import platform
import typing as ty
import pytest
import attrs
import cloudpickle as cp
from unittest.mock import Mock
from fileformats.generic import Directory, File
Expand All @@ -15,9 +17,10 @@
load_and_run,
position_sort,
parse_copyfile,
argstr_formatting,
parse_format_string,
)
from ...utils.hash import hash_function
from .. import helpers_file
from ..core import Workflow


Expand Down Expand Up @@ -50,7 +53,7 @@ def test_hash_file(tmpdir):
with open(outdir / "test.file", "w") as fp:
fp.write("test")
assert (
hash_function(File(outdir / "test.file")) == "37fcc546dce7e59585f3217bb4c30299"
hash_function(File(outdir / "test.file")) == "f32ab20c4a86616e32bf2504e1ac5a22"
)


Expand Down Expand Up @@ -311,3 +314,51 @@ def mock_field(copyfile):
parse_copyfile(mock_field((1, 2)))
with pytest.raises(TypeError, match="Unrecognised type for collation copyfile"):
parse_copyfile(mock_field((Mode.copy, 2)))


def test_argstr_formatting():
@attrs.define
class Inputs:
a1_field: str
b2_field: float
c3_field: ty.Dict[str, str]
d4_field: ty.List[str]

inputs = Inputs("1", 2.0, {"c": "3"}, ["4"])
assert (
argstr_formatting(
"{a1_field} {b2_field:02f} -test {c3_field[c]} -me {d4_field[0]}",
inputs,
)
== "1 2.000000 -test 3 -me 4"
)


def test_parse_format_string1():
assert parse_format_string("{a}") == {"a"}


def test_parse_format_string2():
assert parse_format_string("{abc}") == {"abc"}


def test_parse_format_string3():
assert parse_format_string("{a:{b}}") == {"a", "b"}


def test_parse_format_string4():
assert parse_format_string("{a:{b[2]}}") == {"a", "b"}


def test_parse_format_string5():
assert parse_format_string("{a.xyz[somekey].abc:{b[a][b].d[0]}}") == {"a", "b"}


def test_parse_format_string6():
assert parse_format_string("{a:05{b[a 2][b].e}}") == {"a", "b"}


def test_parse_format_string7():
assert parse_format_string(
"{a1_field} {b2_field:02f} -test {c3_field[c]} -me {d4_field[0]}"
) == {"a1_field", "b2_field", "c3_field", "d4_field"}
12 changes: 6 additions & 6 deletions pydra/engine/tests/test_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def test_input_file_hash_1(tmp_path):
fields = [("in_file", File)]
input_spec = SpecInfo(name="Inputs", fields=fields, bases=(BaseSpec,))
inputs = make_klass(input_spec)
assert inputs(in_file=outfile).hash == "0e9306e5cae1de1b4dff1f27cca03bce"
assert inputs(in_file=outfile).hash == "02fa5f6f1bbde7f25349f54335e1adaf"


def test_input_file_hash_2(tmp_path):
Expand All @@ -154,7 +154,7 @@ def test_input_file_hash_2(tmp_path):

# checking specific hash value
hash1 = inputs(in_file=file).hash
assert hash1 == "17e4e2b4d8ce8f36bf3fd65804958dbb"
assert hash1 == "aaa50d60ed33d3a316d58edc882a34c3"

# checking if different name doesn't affect the hash
file_diffname = tmp_path / "in_file_2.txt"
Expand Down Expand Up @@ -185,7 +185,7 @@ def test_input_file_hash_2a(tmp_path):

# checking specific hash value
hash1 = inputs(in_file=file).hash
assert hash1 == "17e4e2b4d8ce8f36bf3fd65804958dbb"
assert hash1 == "aaa50d60ed33d3a316d58edc882a34c3"

# checking if different name doesn't affect the hash
file_diffname = tmp_path / "in_file_2.txt"
Expand All @@ -204,7 +204,7 @@ def test_input_file_hash_2a(tmp_path):

# checking if string is also accepted
hash4 = inputs(in_file=str(file)).hash
assert hash4 == "aee7c7ae25509fb4c92a081d58d17a67"
assert hash4 == "800af2b5b334c9e3e5c40c0e49b7ffb5"


def test_input_file_hash_3(tmp_path):
Expand Down Expand Up @@ -278,7 +278,7 @@ def test_input_file_hash_4(tmp_path):

# checking specific hash value
hash1 = inputs(in_file=[[file, 3]]).hash
assert hash1 == "11b7e9c90bc8d9dc5ccfc8d4526ba091"
assert hash1 == "0693adbfac9f675af87e900065b1de00"

# the same file, but int field changes
hash1a = inputs(in_file=[[file, 5]]).hash
Expand Down Expand Up @@ -315,7 +315,7 @@ def test_input_file_hash_5(tmp_path):

# checking specific hash value
hash1 = inputs(in_file=[{"file": file, "int": 3}]).hash
assert hash1 == "5fd53b79e55bbf62a4bb3027eb753a2c"
assert hash1 == "56e6e2c9f3bdf0cd5bd3060046dea480"

# the same file, but int field changes
hash1a = inputs(in_file=[{"file": file, "int": 5}]).hash
Expand Down

0 comments on commit 00b92ec

Please sign in to comment.