Skip to content

Commit

Permalink
Merge pull request #2712 from mashehu/type-schema
Browse files Browse the repository at this point in the history
use types for default value comparison
  • Loading branch information
mashehu authored Feb 1, 2024
2 parents 571413e + ef65f76 commit d7e81bd
Show file tree
Hide file tree
Showing 6 changed files with 137 additions and 40 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
### General

- Update pre-commit hook astral-sh/ruff-pre-commit to v0.1.15 ([#2705](https://github.com/nf-core/tools/pull/2705))
- use types for default value comparison ([#2712](https://github.com/nf-core/tools/pull/2712))
- fix changelog titles ([#2708](https://github.com/nf-core/tools/pull/2708))
- Print relative path not absolute path in logo cmd log output ([#2709](https://github.com/nf-core/tools/pull/2709))
- Update codecov/codecov-action action to v4 ([#2713](https://github.com/nf-core/tools/pull/2713))
Expand Down
47 changes: 30 additions & 17 deletions nf_core/lint/nextflow_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,33 +373,46 @@ def nextflow_config(self):
schema.no_prompts = True
schema.load_schema()
schema.get_schema_defaults() # Get default values from schema
schema.get_schema_types() # Get types from schema
self.nf_config.keys() # Params in nextflow.config
for param_name in schema.schema_defaults.keys():
param = "params." + param_name
# Convert booleans to strings if needed
schema_default = (
"true"
if str(schema.schema_defaults[param_name]) == "True"
else "false"
if str(schema.schema_defaults[param_name]) == "False"
else str(schema.schema_defaults[param_name])
)
if param in ignore_defaults:
ignored.append(f"Config default ignored: {param}")
elif param in self.nf_config.keys():
if str(self.nf_config[param]) == schema_default:
passed.append(f"Config default value correct: {param}")
else:
# Handle "number" type
if schema_default.endswith(".0") and str(self.nf_config[param]) == schema_default[:-2]:
passed.append(f"Config default value correct: {param}")
else:
config_default = None
schema_default = None
if schema.schema_types[param_name] == "boolean":
schema_default = str(schema.schema_defaults[param_name]).lower()
config_default = str(self.nf_config[param]).lower()
elif schema.schema_types[param_name] == "number":
try:
schema_default = float(schema.schema_defaults[param_name])
config_default = float(self.nf_config[param])
except ValueError:
failed.append(
f"Config default value incorrect: `{param}` is set as type `number` in nextflow_schema.json, but is not a number in `nextflow.config`."
)
elif schema.schema_types[param_name] == "integer":
try:
schema_default = int(schema.schema_defaults[param_name])
config_default = int(self.nf_config[param])
except ValueError:
failed.append(
f"Config default value incorrect: `{param}` is set as {self._wrap_quotes(schema_default)} in `nextflow_schema.json` but is {self._wrap_quotes(self.nf_config[param])} in `nextflow.config`."
f"Config default value incorrect: `{param}` is set as type `integer` in nextflow_schema.json, but is not an integer in `nextflow.config`."
)
else:
schema_default = str(schema.schema_defaults[param_name])
config_default = str(self.nf_config[param])
if config_default is not None and config_default == schema_default:
passed.append(f"Config default value correct: {param}= {schema_default}")
else:
failed.append(
f"Config default value incorrect: `{param}` is set as {self._wrap_quotes(schema_default)} in `nextflow_schema.json` but is {self._wrap_quotes(self.nf_config[param])} in `nextflow.config`."
)
else:
failed.append(
f"Default value from the Nextflow schema '{param} = {self._wrap_quotes(schema_default)}' not found in `nextflow.config`."
f"Default value from the Nextflow schema `{param} = {self._wrap_quotes(schema_default)}` not found in `nextflow.config`."
)

return {"passed": passed, "warned": warned, "failed": failed, "ignored": ignored}
6 changes: 4 additions & 2 deletions nf_core/list.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import os
import re
from datetime import datetime
from pathlib import Path
from typing import Union

import git
import requests
Expand Down Expand Up @@ -39,12 +41,12 @@ def list_workflows(filter_by=None, sort_by="release", as_json=False, show_archiv
return wfs.print_summary()


def get_local_wf(workflow, revision=None):
def get_local_wf(workflow: Union[str, Path], revision=None) -> Union[str, None]:
"""
Check if this workflow has a local copy and use nextflow to pull it if not
"""
# Assume nf-core if no org given
if workflow.count("/") == 0:
if str(workflow).count("/") == 0:
workflow = f"nf-core/{workflow}"

wfs = Workflows()
Expand Down
48 changes: 32 additions & 16 deletions nf_core/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
import copy
import json
import logging
import os
import tempfile
import webbrowser
from pathlib import Path

import jinja2
import jsonschema
Expand All @@ -30,10 +30,11 @@ class PipelineSchema:
def __init__(self):
"""Initialise the object"""

self.schema = None
self.pipeline_dir = None
self.schema_filename = None
self.schema = {}
self.pipeline_dir = ""
self.schema_filename = ""
self.schema_defaults = {}
self.schema_types = {}
self.schema_params = {}
self.input_params = {}
self.pipeline_params = {}
Expand All @@ -48,29 +49,29 @@ def __init__(self):

def get_schema_path(self, path, local_only=False, revision=None):
"""Given a pipeline name, directory, or path, set self.schema_filename"""

path = Path(path)
# Supplied path exists - assume a local pipeline directory or schema
if os.path.exists(path):
if path.exists():
if revision is not None:
log.warning(f"Local workflow supplied, ignoring revision '{revision}'")
if os.path.isdir(path):
if path.is_dir():
self.pipeline_dir = path
self.schema_filename = os.path.join(path, "nextflow_schema.json")
self.schema_filename = path / "nextflow_schema.json"
else:
self.pipeline_dir = os.path.dirname(path)
self.pipeline_dir = path.parent
self.schema_filename = path

# Path does not exist - assume a name of a remote workflow
elif not local_only:
self.pipeline_dir = nf_core.list.get_local_wf(path, revision=revision)
self.schema_filename = os.path.join(self.pipeline_dir, "nextflow_schema.json")
self.schema_filename = Path(self.pipeline_dir, "nextflow_schema.json")

# Only looking for local paths, overwrite with None to be safe
else:
self.schema_filename = None

# Check that the schema file exists
if self.schema_filename is None or not os.path.exists(self.schema_filename):
if self.schema_filename is None or not Path(self.schema_filename).exists():
error = f"Could not find pipeline schema for '{path}': {self.schema_filename}"
log.error(error)
raise AssertionError(error)
Expand Down Expand Up @@ -106,6 +107,9 @@ def load_lint_schema(self):

def load_schema(self):
"""Load a pipeline schema from a file"""
if self.schema_filename is None:
raise AssertionError("Pipeline schema filename could not be found.")

with open(self.schema_filename) as fh:
self.schema = json.load(fh)
self.schema_defaults = {}
Expand Down Expand Up @@ -147,7 +151,7 @@ def sanitise_param_default(self, param):
param["default"] = str(param["default"])
return param

def get_schema_defaults(self):
def get_schema_defaults(self) -> None:
"""
Generate set of default input parameters from schema.
Expand All @@ -171,6 +175,16 @@ def get_schema_defaults(self):
if param["default"] is not None:
self.schema_defaults[p_key] = param["default"]

def get_schema_types(self) -> None:
"""Get a list of all parameter types in the schema"""
for name, param in self.schema.get("properties", {}).items():
if "type" in param:
self.schema_types[name] = param["type"]
for _, definition in self.schema.get("definitions", {}).items():
for name, param in definition.get("properties", {}).items():
if "type" in param:
self.schema_types[name] = param["type"]

def save_schema(self, suppress_logging=False):
"""Save a pipeline schema to a file"""
# Write results to a JSON file
Expand Down Expand Up @@ -486,7 +500,7 @@ def print_documentation(
console = rich.console.Console()
console.print("\n", Syntax(prettified_docs, format), "\n")
else:
if os.path.exists(output_fn) and not force:
if Path(output_fn).exists() and not force:
log.error(f"File '{output_fn}' exists! Please delete first, or use '--force'")
return
with open(output_fn, "w") as fh:
Expand Down Expand Up @@ -572,7 +586,7 @@ def make_skeleton_schema(self):
)
schema_template = env.get_template("nextflow_schema.json")
template_vars = {
"name": self.pipeline_manifest.get("name", os.path.dirname(self.schema_filename)).strip("'"),
"name": self.pipeline_manifest.get("name", Path(self.schema_filename).parent).strip("'"),
"description": self.pipeline_manifest.get("description", "").strip("'"),
}
self.schema = json.loads(schema_template.render(template_vars))
Expand Down Expand Up @@ -656,9 +670,11 @@ def get_wf_params(self):
if len(self.pipeline_params) > 0 and len(self.pipeline_manifest) > 0:
log.debug("Skipping get_wf_params as we already have them")
return

if self.schema_filename is None:
log.error("Cannot get workflow params without a schema file")
return
log.debug("Collecting pipeline parameter defaults\n")
config = nf_core.utils.fetch_wf_config(os.path.dirname(self.schema_filename))
config = nf_core.utils.fetch_wf_config(Path(self.schema_filename).parent)
skipped_params = []
# Pull out just the params. values
for ckey, cval in config.items():
Expand Down
73 changes: 68 additions & 5 deletions tests/lint/nextflow_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ def test_default_values_match(self):
result = lint_obj.nextflow_config()
assert len(result["failed"]) == 0
assert len(result["warned"]) == 0
assert "Config default value correct: params.max_cpus" in result["passed"]
assert "Config default value correct: params.validate_params" in result["passed"]
assert "Config default value correct: params.max_cpus" in str(result["passed"])
assert "Config default value correct: params.validate_params" in str(result["passed"])


def test_default_values_fail(self):
Expand All @@ -75,7 +75,7 @@ def test_default_values_fail(self):
nf_conf_file = Path(new_pipeline) / "nextflow.config"
with open(nf_conf_file) as f:
content = f.read()
fail_content = re.sub(r"\bmax_cpus = 16\b", "max_cpus = 0", content)
fail_content = re.sub(r"\bmax_cpus\s*=\s*16\b", "max_cpus = 0", content)
with open(nf_conf_file, "w") as f:
f.write(fail_content)
# Change the default value of max_memory in nextflow_schema.json
Expand Down Expand Up @@ -115,5 +115,68 @@ def test_default_values_ignored(self):
result = lint_obj.nextflow_config()
assert len(result["failed"]) == 0
assert len(result["ignored"]) == 1
assert "Config default value correct: params.max_cpus" not in result["passed"]
assert "Config default ignored: params.max_cpus" in result["ignored"]
assert "Config default value correct: params.max_cpu" not in str(result["passed"])
assert "Config default ignored: params.max_cpus" in str(result["ignored"])


def test_default_values_float(self):
"""Test comparing two float values."""
new_pipeline = self._make_pipeline_copy()
# Add a float value `dummy=0.0001` to the nextflow.config below `validate_params`
nf_conf_file = Path(new_pipeline) / "nextflow.config"
with open(nf_conf_file) as f:
content = f.read()
fail_content = re.sub(
r"validate_params\s*=\s*true", "params.validate_params = true\ndummy = 0.000000001", content
)
with open(nf_conf_file, "w") as f:
f.write(fail_content)
# Add a float value `dummy` to the nextflow_schema.json
nf_schema_file = Path(new_pipeline) / "nextflow_schema.json"
with open(nf_schema_file) as f:
content = f.read()
fail_content = re.sub(
r'"validate_params": {',
' "dummy": {"type": "number","default":0.000000001},\n"validate_params": {',
content,
)
with open(nf_schema_file, "w") as f:
f.write(fail_content)

lint_obj = nf_core.lint.PipelineLint(new_pipeline)
lint_obj._load_pipeline_config()
result = lint_obj.nextflow_config()
assert len(result["failed"]) == 0
assert len(result["warned"]) == 0
assert "Config default value correct: params.dummy" in str(result["passed"])


def test_default_values_float_fail(self):
"""Test comparing two float values."""
new_pipeline = self._make_pipeline_copy()
# Add a float value `dummy=0.0001` to the nextflow.config below `validate_params`
nf_conf_file = Path(new_pipeline) / "nextflow.config"
with open(nf_conf_file) as f:
content = f.read()
fail_content = re.sub(
r"validate_params\s*=\s*true", "params.validate_params = true\ndummy = 0.000000001", content
)
with open(nf_conf_file, "w") as f:
f.write(fail_content)
# Add a float value `dummy` to the nextflow_schema.json
nf_schema_file = Path(new_pipeline) / "nextflow_schema.json"
with open(nf_schema_file) as f:
content = f.read()
fail_content = re.sub(
r'"validate_params": {', ' "dummy": {"type": "float","default":0.000001},\n"validate_params": {', content
)
with open(nf_schema_file, "w") as f:
f.write(fail_content)

lint_obj = nf_core.lint.PipelineLint(new_pipeline)
lint_obj._load_pipeline_config()
result = lint_obj.nextflow_config()

assert len(result["failed"]) == 1
assert len(result["warned"]) == 0
assert "Config default value incorrect: `params.dummy" in str(result["failed"])
2 changes: 2 additions & 0 deletions tests/test_lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,8 @@ def test_sphinx_md_files(self):
)
from .lint.nextflow_config import ( # type: ignore[misc]
test_default_values_fail,
test_default_values_float,
test_default_values_float_fail,
test_default_values_ignored,
test_default_values_match,
test_nextflow_config_bad_name_fail,
Expand Down

0 comments on commit d7e81bd

Please sign in to comment.