Skip to content

Commit ff546d6

Browse files
edmundmillerclaude
andcommitted
refactor: Improve version snapshot content validation with enhanced regex and error handling
- Extract version checking logic into dedicated helper functions for better modularity - Implement more precise regex patterns with proper word boundaries to avoid false positives - Optimize performance by reducing string conversions from multiple to single per test - Add comprehensive test coverage for SHA hashes, mixed scenarios, and edge cases - Enhance error messages with clearer guidance and examples for developers - Improve code maintainability and readability through separation of concerns 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]>
1 parent a35f9bc commit ff546d6

File tree

2 files changed

+164
-26
lines changed

2 files changed

+164
-26
lines changed

nf_core/modules/lint/module_tests.py

Lines changed: 65 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,59 @@
1515
log = logging.getLogger(__name__)
1616

1717

18+
def _check_version_content_format(snap_content, test_name, snap_file):
19+
"""
20+
Check if version content uses actual YAML data vs hash format.
21+
22+
Args:
23+
snap_content: Parsed JSON snapshot content
24+
test_name: Name of the test being checked
25+
snap_file: Path to snapshot file (for error reporting)
26+
27+
Returns:
28+
Tuple for passed test if valid, None if invalid or no version data found
29+
"""
30+
# Check if this test contains version data and if it's in hash format
31+
if _contains_version_hash(snap_content[test_name]):
32+
return None # Invalid - contains hash format
33+
34+
# Valid - either contains actual content or no version hash detected
35+
return (
36+
"test_snap_version_content",
37+
"version information contains actual content instead of hash",
38+
snap_file,
39+
)
40+
41+
42+
def _contains_version_hash(test_content):
43+
"""
44+
Check if test content contains version information in hash format.
45+
46+
Uses precise regex patterns to detect version hash formats while avoiding
47+
false positives from similar strings.
48+
49+
Args:
50+
test_content: Content of a single test from snapshot
51+
52+
Returns:
53+
bool: True if hash format detected, False otherwise
54+
"""
55+
# More precise regex patterns with proper boundaries
56+
version_hash_patterns = [
57+
r"\bversions\.yml:md5,[a-f0-9]{32}\b", # Exact MD5 format (32 hex chars)
58+
r"\bversions\.yml:sha[0-9]*,[a-f0-9]+\b", # SHA format with variable length
59+
]
60+
61+
# Convert to string only once and search efficiently
62+
content_str = str(test_content)
63+
64+
for pattern in version_hash_patterns:
65+
if re.search(pattern, content_str):
66+
return True
67+
68+
return False
69+
70+
1871
def module_tests(_, module: NFCoreComponent, allow_missing: bool = False):
1972
"""
2073
Lint the tests of a module in ``nf-core/modules``
@@ -170,36 +223,22 @@ def module_tests(_, module: NFCoreComponent, allow_missing: bool = False):
170223
snap_file,
171224
)
172225
)
173-
# Check if version content is actual content vs MD5 hash
226+
# Check if version content is actual content vs MD5/SHA hash
174227
# Related to: https://github.com/nf-core/modules/issues/6505
175228
# Ensures version snapshots contain actual content instead of hash values
176-
version_content_valid = True
177-
version_hash_patterns = [
178-
r"versions\.yml:md5,[\da-f]+", # MD5 hash pattern
179-
r"versions\.yml:sha[\d]*,[\da-f]+", # SHA hash pattern
180-
]
181-
182-
for pattern in version_hash_patterns:
183-
if re.search(pattern, str(snap_content[test_name])):
184-
version_content_valid = False
185-
break
186-
187-
if version_content_valid:
188-
module.passed.append(
189-
(
190-
"test_snap_version_content",
191-
"version information contains actual content instead of hash",
192-
snap_file,
193-
)
194-
)
229+
version_check_result = _check_version_content_format(snap_content, test_name, snap_file)
230+
if version_check_result:
231+
module.passed.append(version_check_result)
195232
else:
196-
module.failed.append(
197-
(
198-
"test_snap_version_content",
199-
"version information should contain actual content, not MD5/SHA hash",
200-
snap_file,
233+
# Only add failure if we found hash patterns
234+
if _contains_version_hash(snap_content[test_name]):
235+
module.failed.append(
236+
(
237+
"test_snap_version_content",
238+
"Version information should contain actual YAML content (e.g., {'tool': {'version': '1.0'}}), not hash format like 'versions.yml:md5,hash'",
239+
snap_file,
240+
)
201241
)
202-
)
203242
else:
204243
module.failed.append(
205244
(

tests/modules/lint/test_module_tests.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,3 +286,102 @@ def test_modules_version_snapshot_content_valid(self):
286286
# reset the file
287287
with open(snap_file, "w") as fh:
288288
fh.write(content)
289+
290+
@pytest.mark.issue("https://github.com/nf-core/modules/issues/6505")
291+
def test_modules_version_snapshot_content_sha_hash(self):
292+
"""Test linting a nf-test module with version information as SHA hash, which should fail.
293+
294+
Related to: https://github.com/nf-core/modules/issues/6505
295+
Fixed in: https://github.com/nf-core/tools/pull/3676
296+
"""
297+
snap_file = self.bpipe_test_module_path / "tests" / "main.nf.test.snap"
298+
snap = json.load(snap_file.open())
299+
content = snap_file.read_text()
300+
301+
# Add a version entry with SHA hash format (should be flagged)
302+
snap["my test"]["content"][0]["versions"] = (
303+
"versions.yml:sha256,e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
304+
)
305+
306+
with open(snap_file, "w") as fh:
307+
json.dump(snap, fh)
308+
309+
module_lint = nf_core.modules.lint.ModuleLint(directory=self.nfcore_modules)
310+
module_lint.lint(print_results=False, module="bpipe/test")
311+
312+
# Should fail because version is using SHA hash instead of actual content
313+
version_content_failures = [x for x in module_lint.failed if x.lint_test == "test_snap_version_content"]
314+
assert len(version_content_failures) == 1, (
315+
f"Expected 1 test_snap_version_content failure, got {len(version_content_failures)}"
316+
)
317+
318+
# reset the file
319+
with open(snap_file, "w") as fh:
320+
fh.write(content)
321+
322+
@pytest.mark.issue("https://github.com/nf-core/modules/issues/6505")
323+
def test_modules_version_snapshot_content_mixed_scenario(self):
324+
"""Test linting with mixed version content - some valid, some hash format.
325+
326+
Related to: https://github.com/nf-core/modules/issues/6505
327+
Fixed in: https://github.com/nf-core/tools/pull/3676
328+
"""
329+
snap_file = self.bpipe_test_module_path / "tests" / "main.nf.test.snap"
330+
snap = json.load(snap_file.open())
331+
content = snap_file.read_text()
332+
333+
# Create a scenario with multiple tests - one with hash, one with valid content
334+
snap["test_with_hash"] = {"content": [{"versions": "versions.yml:md5,949da9c6297b613b50e24c421576f3f1"}]}
335+
snap["test_with_valid_content"] = {"content": [{"versions": {"BPIPE": {"bpipe": "0.9.11"}}}]}
336+
337+
with open(snap_file, "w") as fh:
338+
json.dump(snap, fh)
339+
340+
module_lint = nf_core.modules.lint.ModuleLint(directory=self.nfcore_modules)
341+
module_lint.lint(print_results=False, module="bpipe/test")
342+
343+
# Should have failure for the hash test
344+
version_content_failures = [x for x in module_lint.failed if x.lint_test == "test_snap_version_content"]
345+
assert len(version_content_failures) >= 1, "Expected at least 1 failure for hash format"
346+
347+
# Should have pass for the valid content test
348+
version_content_passed = [
349+
x
350+
for x in module_lint.passed
351+
if (hasattr(x, "lint_test") and x.lint_test == "test_snap_version_content")
352+
or (isinstance(x, tuple) and len(x) > 0 and x[0] == "test_snap_version_content")
353+
]
354+
assert len(version_content_passed) >= 1, "Expected at least 1 pass for valid content"
355+
356+
# reset the file
357+
with open(snap_file, "w") as fh:
358+
fh.write(content)
359+
360+
@pytest.mark.issue("https://github.com/nf-core/modules/issues/6505")
361+
def test_modules_version_snapshot_no_version_content(self):
362+
"""Test linting when no version information is present - should not trigger version content check.
363+
364+
Related to: https://github.com/nf-core/modules/issues/6505
365+
Fixed in: https://github.com/nf-core/tools/pull/3676
366+
"""
367+
snap_file = self.bpipe_test_module_path / "tests" / "main.nf.test.snap"
368+
snap = json.load(snap_file.open())
369+
content = snap_file.read_text()
370+
371+
# Remove version information entirely
372+
if "content" in snap["my test"] and snap["my test"]["content"]:
373+
snap["my test"]["content"][0].pop("versions", None)
374+
375+
with open(snap_file, "w") as fh:
376+
json.dump(snap, fh)
377+
378+
module_lint = nf_core.modules.lint.ModuleLint(directory=self.nfcore_modules)
379+
module_lint.lint(print_results=False, module="bpipe/test")
380+
381+
# Should not have version content check failures when no version data present
382+
version_content_failures = [x for x in module_lint.failed if x.lint_test == "test_snap_version_content"]
383+
assert len(version_content_failures) == 0, "Should not have version content failures when no versions present"
384+
385+
# reset the file
386+
with open(snap_file, "w") as fh:
387+
fh.write(content)

0 commit comments

Comments
 (0)