diff --git a/metaflow/cli.py b/metaflow/cli.py index 0bf7a46e36..592df2dcca 100644 --- a/metaflow/cli.py +++ b/metaflow/cli.py @@ -47,7 +47,7 @@ resolve_identity, write_latest_run_id, ) -from .user_configs import LocalFileInput +from .user_configs import LocalFileInput, config_options ERASE_TO_EOL = "\033[K" HIGHLIGHT = "red" @@ -845,7 +845,10 @@ def version(obj): @tracing.cli_entrypoint("cli/start") -@decorators.add_decorator_and_config_options +# NOTE: add_decorator_options should be TL because it checks to make sure +# that no option conflict with the ones below +@decorators.add_decorator_options +@config_options @click.command( cls=click.CommandCollection, sources=[cli] + plugins.get_plugin_cli(), @@ -936,7 +939,8 @@ def start( event_logger=None, monitor=None, local_info_file=None, - **deco_and_config_options + config_options=None, + **deco_options ): global echo if quiet: @@ -957,7 +961,7 @@ def start( # process all those decorators that the user added that will modify the flow based # on those configurations. It is important to do this as early as possible since it # actually modifies the flow itself - ctx.obj.flow = ctx.obj.flow._process_config_funcs(deco_and_config_options) + ctx.obj.flow = ctx.obj.flow._process_config_funcs(config_options) cli_args._set_top_kwargs(ctx.params) ctx.obj.echo = echo @@ -1015,16 +1019,7 @@ def start( ctx.obj.monitor, ) - ctx.obj.config_options = { - k: v - for k, v in deco_and_config_options.items() - if k in ctx.command.config_options - } - deco_options = { - k: v - for k, v in deco_and_config_options.items() - if k not in ctx.command.config_options - } + ctx.obj.config_options = config_options decorators._resolve_configs(ctx.obj.flow) diff --git a/metaflow/decorators.py b/metaflow/decorators.py index 7e2a0345f0..8df702ebf1 100644 --- a/metaflow/decorators.py +++ b/metaflow/decorators.py @@ -227,27 +227,13 @@ def get_top_level_options(self): # compare this to parameters.add_custom_parameters -def add_decorator_and_config_options(cmd): - config_seen = {} +def add_decorator_options(cmd): flow_cls = getattr(current_flow, "flow_cls", None) if flow_cls is None: return cmd - parameters = [p for _, p in flow_cls._get_parameters() if p.IS_FLOW_PARAMETER] - # Add configuration options - for arg in parameters[::-1]: - kwargs = arg.option_kwargs(False) - if arg.name in config_seen: - msg = ( - "Multiple configurations use the same name '%s'. Please change the " - "names of some of your configurations" % arg.name - ) - raise MetaflowException(msg) - config_seen[arg.name] = arg - cmd.params.insert(0, click.Option(("--" + arg.name,), **kwargs)) - - cmd.config_options = set(config_seen.keys()) seen = {} + existing_params = set(p.name.lower() for p in cmd.params) # Add decorator options for deco in flow_decorators(flow_cls): for option, kwargs in deco.options.items(): @@ -259,11 +245,10 @@ def add_decorator_and_config_options(cmd): % (deco.name, option, seen[option]) ) raise MetaflowInternalError(msg) - elif option in config_seen: - msg = ( - "Flow decorator '%s' uses an option '%s' which is also " - "used by a configuration. Please change the name of the " - "configuration" % (deco.name, option) + elif deco.name.lower() in existing_params: + raise MetaflowInternalError( + "Flow decorator '%s' uses an option '%s' which is a reserved " + "keyword. Please use a different option name." % (deco.name, option) ) else: kwargs["envvar"] = "METAFLOW_FLOW_%s" % option.upper() diff --git a/metaflow/runner/click_api.py b/metaflow/runner/click_api.py index 5dccfbea91..470c6b655d 100644 --- a/metaflow/runner/click_api.py +++ b/metaflow/runner/click_api.py @@ -32,7 +32,7 @@ UUIDParameterType, ) from metaflow._vendor.typeguard import TypeCheckError, check_type -from metaflow.decorators import add_decorator_and_config_options +from metaflow.decorators import add_decorator_options from metaflow.exception import MetaflowException from metaflow.includefile import FilePathClass from metaflow.parameters import JSONTypeClass, flow_context @@ -184,9 +184,11 @@ def chain(self): @classmethod def from_cli(cls, flow_file: str, cli_collection: Callable) -> Callable: flow_cls = extract_flow_class_from_file(flow_file) - flow_parameters = [p for _, p in flow_cls._get_parameters()] + flow_parameters = [ + p for _, p in flow_cls._get_parameters() if not p.IS_FLOW_PARAMETER + ] with flow_context(flow_cls) as _: - add_decorator_and_config_options(cli_collection) + add_decorator_options(cli_collection) class_dict = {"__module__": "metaflow", "_API_NAME": flow_file} command_groups = cli_collection.sources diff --git a/metaflow/runtime.py b/metaflow/runtime.py index f9535c5433..50fe7aaa45 100644 --- a/metaflow/runtime.py +++ b/metaflow/runtime.py @@ -1468,9 +1468,10 @@ def __init__(self, task): # We also pass configuration options using the kv. syntax which will cause # the configuration options to be loaded from the INFO file (or local-info-file # in the case of the local runtime) - self.top_level_options.update( - {k: ConfigInput.make_key_name(k) for k in self.task.flow._user_configs} - ) + if self.task.flow._user_configs: + self.top_level_options["config"] = [ + (k, ConfigInput.make_key_name(k)) for k in self.task.flow._user_configs + ] self.commands = ["step"] self.command_args = [self.task.step] @@ -1505,7 +1506,9 @@ def _options(mapping): for value in v: yield "--%s" % k if not isinstance(value, bool): - yield to_unicode(value) + value = value if isinstance(value, tuple) else (value,) + for vv in value: + yield to_unicode(vv) args = list(self.entrypoint) args.extend(_options(self.top_level_options)) diff --git a/metaflow/user_configs.py b/metaflow/user_configs.py index 3de376a8f1..ed255e1899 100644 --- a/metaflow/user_configs.py +++ b/metaflow/user_configs.py @@ -1,7 +1,7 @@ import json import os -from typing import Any, Callable, Dict, Optional, Union, TYPE_CHECKING +from typing import Any, Callable, Dict, List, Optional, Union, TYPE_CHECKING from metaflow import INFO_FILE from metaflow._vendor import click @@ -79,9 +79,32 @@ def __str__(self): return json.dumps(self._data) -class ConfigInput(click.ParamType): +class PathOrStr(click.ParamType): name = "ConfigInput" + def convert(self, value, param, ctx): + if value is None: + return None + + if isinstance(value, dict): + return "converted:" + json.dumps(value) + + if value.startswith("converted:"): + return value + + if os.path.isfile(value): + try: + with open(value, "r") as f: + content = f.read() + except OSError as e: + raise click.UsageError( + "Could not read configuration file '%s'" % value + ) from e + return "converted:" + content + return "converted:" + value + + +class ConfigInput: # Contains the values loaded from the INFO file. We make this a class method # so that if there are multiple configs, we just need to read the file once. # It is OK to be globally unique because this is only evoked in scenario A.2 (see @@ -92,8 +115,13 @@ class ConfigInput(click.ParamType): loaded_configs = None # type: Optional[Dict[str, Dict[str, Any]]] info_file = None # type: Optional[str] - def __init__(self, parser: Optional[Callable[[str], Dict[str, Any]]] = None): - self._parser = parser + def __init__( + self, + req_configs: List[str], + parsers: Dict[str, Callable[[str], Dict[str, Any]]], + ): + self._req_configs = req_configs + self._parsers = parsers @staticmethod def make_key_name(name: str) -> str: @@ -115,84 +143,51 @@ def get_config(cls, config_name: str) -> Optional[Dict[str, Any]]: cls.loaded_configs = all_configs return cls.loaded_configs.get(config_name, None) - # @tracefunc - def convert(self, value, param, ctx): + def process_configs(self, ctx, param, value): flow_cls = getattr(current_flow, "flow_cls", None) if flow_cls is None: - # We are not executing inside a flow (ie: not the CLI) - raise MetaflowException("Config object can only be used in a FlowSpec") - - # Click can call convert multiple times, so we need to make sure to only - # convert once. - if isinstance(value, ConfigValue): - return value - - # The value we get in to convert can be: - # - a dictionary - # - a path to a YAML or JSON file - # - the string representation of a YAML or JSON file - # In all cases, we also store the configuration in the flow object under _user_configs. - # It will *not* be stored as an artifact but is a good place to store it so we - # can access it when packaging to store it in the INFO file. The config itself - # will be stored as regular artifacts (the ConfigValue object basically) - - if isinstance(value, dict): - if self._parser: - value = self._parser(value) - flow_cls._user_configs[param.name] = value - return ConfigValue(value) - elif not isinstance(value, str): - raise MetaflowException( - "Configuration value for '%s' must be a string or a dictionary" - % param.name + # This is an error + raise MetaflowInternalError( + "Config values should be processed for a FlowSpec" ) - # Here we are sure we have a string - if value.startswith("kv."): - value = self.get_config(value[3:]) - if value is None: - raise MetaflowException( - "Could not find configuration '%s' in INFO file" % value - ) - # We also set in flow_cls as this makes it easier to access - flow_cls._user_configs[param.name] = value - return ConfigValue(value) + # First validate if we have all the required parameters + # Here value is a list of tuples. Each tuple has the name of the configuration + # and the string representation of the config (it was already read + # from a file if applicable). + missing = set(self._req_configs) - set([v[0] for v in value]) + if missing: + raise click.UsageError( + "Missing required configuration values: %s" % ", ".join(missing) + ) - elif os.path.isfile(value): - try: - with open(value, "r") as f: - content = f.read() - except OSError as e: - raise MetaflowException( - "Could not read configuration file '%s'" % value - ) from e - if self._parser: - value = self._parser(content) - else: - try: - if self._parser: - value = self._parser(content) - - value = json.loads(content) - except json.JSONDecodeError as e: - raise MetaflowException( - "Configuration file '%s' is not valid JSON" % value - ) from e - # TODO: Support YAML - flow_cls._user_configs[param.name] = value - else: - if self._parser: - value = self._parser(value) + to_return = {} + for name, val in value: + name = name.lower() + val = val[10:] # Remove the "converted:" prefix + if val.startswith("kv."): + # This means to load it from a file + read_value = self.get_config(val[3:]) + if read_value is None: + raise click.UsageError( + "Could not find configuration '%s' in INFO file" % val + ) + flow_cls._user_configs[name] = read_value + to_return[name] = ConfigValue(read_value) else: - try: - value = json.loads(value) - except json.JSONDecodeError as e: - raise MetaflowException( - "Configuration value for '%s' is not valid JSON" % param.name - ) from e - # TODO: Support YAML - flow_cls._user_configs[param.name] = value - return ConfigValue(value) + if self._parsers[name]: + read_value = self._parsers[name](val) + else: + try: + read_value = json.loads(val) + except json.JSONDecodeError as e: + raise click.UsageError( + "Configuration value for '%s' is not valid JSON" % name + ) from e + # TODO: Support YAML + flow_cls._user_configs[name] = read_value + to_return[name] = ConfigValue(read_value) + return to_return def __str__(self): return repr(self) @@ -384,9 +379,12 @@ def __init__( default=default, required=required, help=help, - type=ConfigInput(parser), + type=str, **kwargs, ) + if isinstance(kwargs.get("default", None), str): + kwargs["default"] = json.dumps(kwargs["default"]) + self.parser = parser def load_parameter(self, v): return v @@ -394,3 +392,61 @@ def load_parameter(self, v): def __getattr__(self, name): ev = DelayEvaluator(self.name, is_var_only=True) return ev.__getattr__(name) + + +def config_options(cmd): + help_strs = [] + required_names = [] + defaults = [] + config_seen = set() + parsers = {} + flow_cls = getattr(current_flow, "flow_cls", None) + if flow_cls is None: + return cmd + + parameters = [p for _, p in flow_cls._get_parameters() if p.IS_FLOW_PARAMETER] + # List all the configuration options + for arg in parameters[::-1]: + kwargs = arg.option_kwargs(False) + if arg.name.lower() in config_seen: + msg = ( + "Multiple configurations use the same name '%s'. Note that names are " + "case-insensitive. Please change the " + "names of some of your configurations" % arg.name + ) + raise MetaflowException(msg) + config_seen.add(arg.name.lower()) + if kwargs["required"]: + required_names.append(arg.name) + if kwargs.get("default") is not None: + defaults.append((arg.name.lower(), kwargs["default"])) + else: + defaults.append(None) + help_strs.append(" - %s: %s" % (arg.name.lower(), kwargs.get("help", ""))) + parsers[arg.name.lower()] = arg.parser + + print("DEFAULTS %s" % defaults) + if not config_seen: + # No configurations -- don't add anything + return cmd + + help_str = ( + "Configuration options for the flow. " + "Multiple configurations can be specified." + ) + help_str = "\n\n".join([help_str] + help_strs) + cmd.params.insert( + 0, + click.Option( + ["--config", "config_options"], + nargs=2, + multiple=True, + type=click.Tuple([click.Choice(config_seen), PathOrStr()]), + callback=ConfigInput(required_names, parsers).process_configs, + help=help_str, + envvar="METAFLOW_FLOW_CONFIG", + show_default=False, + default=defaults, + ), + ) + return cmd diff --git a/metaflow/util.py b/metaflow/util.py index 348323ee42..69f3be5db2 100644 --- a/metaflow/util.py +++ b/metaflow/util.py @@ -307,19 +307,19 @@ def dict_to_cli_options(params): # keyword in Python, so we call it 'decospecs' in click args if k == "decospecs": k = "with" - orig_k = k + if k == "config_options": + # Special handling here since we gather them all in one option but actually + # need to send them one at a time using --config kv. + for config_name in v.keys(): + yield "--config" + yield to_unicode(config_name) + yield to_unicode(ConfigInput.make_key_name(config_name)) + continue k = k.replace("_", "-") v = v if isinstance(v, (list, tuple, set)) else [v] for value in v: yield "--%s" % k if not isinstance(value, bool): - if isinstance(value, ConfigValue): - # For ConfigValues, we don't send them as is but instead pass - # the special value that will look up the config value in the - # INFO file - yield ConfigInput.make_key_name(orig_k) - continue - value = to_unicode(value) # Of the value starts with $, assume the caller wants shell variable