From 91e60dfac4c6985e539c0526531889ba6696d013 Mon Sep 17 00:00:00 2001 From: Abishek Sethuraman Date: Mon, 13 Oct 2025 04:29:31 -0700 Subject: [PATCH 1/3] Allow aliases for run_opt (#1141) Summary: Lets allow aliases for a runopt. This will give downstream users to have multiple ways of accessing the same runopt. * Introduce new class for `runopt.alias` which is used to expand on adding aliases to a runopt. * Add a new dict to maintain alias to key values that can be used by `opt.get(name)` * Modify add() to accept list as well, build out the aliases list and modify the previously created dict to fill in alias to primary_key values. * Modify resolve() to check if a different alias is already used in cfg i.e if the "jobPriority" and "job_priority" are aliases for the same one, we don't allow for both to be present in the cfg. * Modify get to look at the alias to primary_key dict as well. Reviewed By: kiukchung Differential Revision: D84157870 --- torchx/specs/api.py | 74 ++++++++++++++++++++++++++++++++--- torchx/specs/test/api_test.py | 43 ++++++++++++++++++++ 2 files changed, 111 insertions(+), 6 deletions(-) diff --git a/torchx/specs/api.py b/torchx/specs/api.py index 5e4afa188..999905e73 100644 --- a/torchx/specs/api.py +++ b/torchx/specs/api.py @@ -891,10 +891,14 @@ class runopt: Represents the metadata about the specific run option """ + class alias(str): + pass + default: CfgVal opt_type: Type[CfgVal] is_required: bool help: str + aliases: list[alias] | None = None @property def is_type_list_of_str(self) -> bool: @@ -986,6 +990,7 @@ class runopts: def __init__(self) -> None: self._opts: Dict[str, runopt] = {} + self._alias_to_key: dict[runopt.alias, str] = {} def __iter__(self) -> Iterator[Tuple[str, runopt]]: return self._opts.items().__iter__() @@ -1013,9 +1018,16 @@ def is_type(obj: CfgVal, tp: Type[CfgVal]) -> bool: def get(self, name: str) -> Optional[runopt]: """ - Returns option if any was registered, or None otherwise + Returns option if any was registered, or None otherwise. + First searches for the option by ``name``, then falls-back to matching ``name`` with any + registered aliases. + """ - return self._opts.get(name, None) + if name in self._opts: + return self._opts[name] + if name in self._alias_to_key: + return self._opts[self._alias_to_key[name]] + return None def resolve(self, cfg: Mapping[str, CfgVal]) -> Dict[str, CfgVal]: """ @@ -1030,6 +1042,24 @@ def resolve(self, cfg: Mapping[str, CfgVal]) -> Dict[str, CfgVal]: for cfg_key, runopt in self._opts.items(): val = resolved_cfg.get(cfg_key) + resolved_name = None + aliases = runopt.aliases or [] + if val is None: + for alias in aliases: + val = resolved_cfg.get(alias) + if alias in cfg or val is not None: + resolved_name = alias + break + else: + resolved_name = cfg_key + for alias in aliases: + duplicate_val = resolved_cfg.get(alias) + if alias in cfg or duplicate_val is not None: + raise InvalidRunConfigException( + f"Duplicate opt name. runopt: `{resolved_name}``, is an alias of runopt: `{alias}`", + resolved_name, + cfg, + ) # check required opt if runopt.is_required and val is None: @@ -1049,7 +1079,7 @@ def resolve(self, cfg: Mapping[str, CfgVal]) -> Dict[str, CfgVal]: ) # not required and not set, set to default - if val is None: + if val is None and resolved_name is None: resolved_cfg[cfg_key] = runopt.default return resolved_cfg @@ -1142,9 +1172,38 @@ def cfg_from_json_repr(self, json_repr: str) -> Dict[str, CfgVal]: cfg[key] = val return cfg + def _get_primary_key_and_aliases( + self, + cfg_key: list[str] | str, + ) -> tuple[str, list[runopt.alias]]: + """ + Returns the primary key and aliases for the given cfg_key. + """ + if isinstance(cfg_key, str): + return cfg_key, [] + + if len(cfg_key) == 0: + raise ValueError("cfg_key must be a non-empty list") + primary_key = None + aliases = list[runopt.alias]() + for name in cfg_key: + if isinstance(name, runopt.alias): + aliases.append(name) + else: + if primary_key is not None: + raise ValueError( + f" Given more than one primary key: {primary_key}, {name}. Please use runopt.alias type for aliases. " + ) + primary_key = name + if primary_key is None or primary_key == "": + raise ValueError( + "Missing cfg_key. Please provide one other than the aliases." + ) + return primary_key, aliases + def add( self, - cfg_key: str, + cfg_key: str | list[str], type_: Type[CfgVal], help: str, default: CfgVal = None, @@ -1155,6 +1214,7 @@ def add( value (if any). If the ``default`` is not specified then this option is a required option. """ + primary_key, aliases = self._get_primary_key_and_aliases(cfg_key) if required and default is not None: raise ValueError( f"Required option: {cfg_key} must not specify default value. Given: {default}" @@ -1165,8 +1225,10 @@ def add( f"Option: {cfg_key}, must be of type: {type_}." f" Given: {default} ({type(default).__name__})" ) - - self._opts[cfg_key] = runopt(default, type_, required, help) + opt = runopt(default, type_, required, help, aliases) + for alias in aliases: + self._alias_to_key[alias] = primary_key + self._opts[primary_key] = opt def update(self, other: "runopts") -> None: self._opts.update(other._opts) diff --git a/torchx/specs/test/api_test.py b/torchx/specs/test/api_test.py index 6bbacd5ee..9f7f8aa1b 100644 --- a/torchx/specs/test/api_test.py +++ b/torchx/specs/test/api_test.py @@ -578,6 +578,49 @@ def test_runopts_add(self) -> None: # this print is intentional (demonstrates the intended usecase) print(opts) + def test_runopts_add_with_aliases(self) -> None: + opts = runopts() + opts.add( + ["job_priority", runopt.alias("jobPriority")], + type_=str, + help="priority for the job", + ) + self.assertEqual(1, len(opts._opts)) + self.assertIsNotNone(opts.get("job_priority")) + self.assertIsNotNone(opts.get("jobPriority")) + + def test_runopts_resolve_with_aliases(self) -> None: + opts = runopts() + opts.add( + ["job_priority", runopt.alias("jobPriority")], + type_=str, + help="priority for the job", + ) + opts.resolve({"job_priority": "high"}) + opts.resolve({"jobPriority": "low"}) + with self.assertRaises(InvalidRunConfigException): + opts.resolve({"job_priority": "high", "jobPriority": "low"}) + + def test_runopts_resolve_with_none_valued_aliases(self) -> None: + opts = runopts() + opts.add( + ["job_priority", runopt.alias("jobPriority")], + type_=str, + help="priority for the job", + ) + opts.add( + ["modelTypeName", runopt.alias("model_type_name")], + type_=Union[str, None], + help="ML Hub Model Type to attribute resource utilization for job", + ) + resolved_opts = opts.resolve({"model_type_name": None, "jobPriority": "low"}) + self.assertEqual(resolved_opts.get("model_type_name"), None) + self.assertEqual(resolved_opts.get("jobPriority"), "low") + self.assertEqual(resolved_opts, {"model_type_name": None, "jobPriority": "low"}) + + with self.assertRaises(InvalidRunConfigException): + opts.resolve({"model_type_name": None, "modelTypeName": "low"}) + def get_runopts(self) -> runopts: opts = runopts() opts.add("run_as", type_=str, help="run as user", required=True) From d2e7db75bb21ea566fb0b85a15f7206d941bc21a Mon Sep 17 00:00:00 2001 From: Abishek Sethuraman Date: Mon, 13 Oct 2025 04:29:31 -0700 Subject: [PATCH 2/3] Add deprecated_aliases to runopt and add warning (#1142) Summary: Similar to `runopt.alias` lets introduce and use `runopt.deprecated`. This will warn the user with a`UserWarning` when the user uses that specific name and suggests the primary one instead. Reviewed By: kiukchung Differential Revision: D84180061 --- torchx/specs/api.py | 45 ++++++++++++++++++++++++++++++----- torchx/specs/test/api_test.py | 27 +++++++++++++++++++++ 2 files changed, 66 insertions(+), 6 deletions(-) diff --git a/torchx/specs/api.py b/torchx/specs/api.py index 999905e73..c28c1a9c2 100644 --- a/torchx/specs/api.py +++ b/torchx/specs/api.py @@ -15,6 +15,7 @@ import pathlib import re import typing +import warnings from dataclasses import asdict, dataclass, field from datetime import datetime from enum import Enum @@ -894,11 +895,15 @@ class runopt: class alias(str): pass + class deprecated(str): + pass + default: CfgVal opt_type: Type[CfgVal] is_required: bool help: str aliases: list[alias] | None = None + deprecated_aliases: list[deprecated] | None = None @property def is_type_list_of_str(self) -> bool: @@ -990,7 +995,7 @@ class runopts: def __init__(self) -> None: self._opts: Dict[str, runopt] = {} - self._alias_to_key: dict[runopt.alias, str] = {} + self._alias_to_key: dict[str, str] = {} def __iter__(self) -> Iterator[Tuple[str, runopt]]: return self._opts.items().__iter__() @@ -1044,12 +1049,24 @@ def resolve(self, cfg: Mapping[str, CfgVal]) -> Dict[str, CfgVal]: val = resolved_cfg.get(cfg_key) resolved_name = None aliases = runopt.aliases or [] + deprecated_aliases = runopt.deprecated_aliases or [] if val is None: for alias in aliases: val = resolved_cfg.get(alias) if alias in cfg or val is not None: resolved_name = alias break + for alias in deprecated_aliases: + val = resolved_cfg.get(alias) + if val is not None: + resolved_name = alias + use_instead = self._alias_to_key.get(alias) + warnings.warn( + f"Run option `{alias}` is deprecated, use `{use_instead}` instead", + UserWarning, + stacklevel=2, + ) + break else: resolved_name = cfg_key for alias in aliases: @@ -1175,20 +1192,32 @@ def cfg_from_json_repr(self, json_repr: str) -> Dict[str, CfgVal]: def _get_primary_key_and_aliases( self, cfg_key: list[str] | str, - ) -> tuple[str, list[runopt.alias]]: + ) -> tuple[str, list[runopt.alias], list[runopt.deprecated]]: """ Returns the primary key and aliases for the given cfg_key. """ if isinstance(cfg_key, str): - return cfg_key, [] + return cfg_key, [], [] if len(cfg_key) == 0: raise ValueError("cfg_key must be a non-empty list") + + if isinstance(cfg_key[0], runopt.alias) or isinstance( + cfg_key[0], runopt.deprecated + ): + warnings.warn( + "The main name of the run option should be the head of the list.", + UserWarning, + stacklevel=2, + ) primary_key = None aliases = list[runopt.alias]() + deprecated_aliases = list[runopt.deprecated]() for name in cfg_key: if isinstance(name, runopt.alias): aliases.append(name) + elif isinstance(name, runopt.deprecated): + deprecated_aliases.append(name) else: if primary_key is not None: raise ValueError( @@ -1199,7 +1228,7 @@ def _get_primary_key_and_aliases( raise ValueError( "Missing cfg_key. Please provide one other than the aliases." ) - return primary_key, aliases + return primary_key, aliases, deprecated_aliases def add( self, @@ -1214,7 +1243,9 @@ def add( value (if any). If the ``default`` is not specified then this option is a required option. """ - primary_key, aliases = self._get_primary_key_and_aliases(cfg_key) + primary_key, aliases, deprecated_aliases = self._get_primary_key_and_aliases( + cfg_key + ) if required and default is not None: raise ValueError( f"Required option: {cfg_key} must not specify default value. Given: {default}" @@ -1225,9 +1256,11 @@ def add( f"Option: {cfg_key}, must be of type: {type_}." f" Given: {default} ({type(default).__name__})" ) - opt = runopt(default, type_, required, help, aliases) + opt = runopt(default, type_, required, help, aliases, deprecated_aliases) for alias in aliases: self._alias_to_key[alias] = primary_key + for deprecated_alias in deprecated_aliases: + self._alias_to_key[deprecated_alias] = primary_key self._opts[primary_key] = opt def update(self, other: "runopts") -> None: diff --git a/torchx/specs/test/api_test.py b/torchx/specs/test/api_test.py index 9f7f8aa1b..3cdbadea5 100644 --- a/torchx/specs/test/api_test.py +++ b/torchx/specs/test/api_test.py @@ -13,6 +13,7 @@ import tempfile import time import unittest +import warnings from dataclasses import asdict from pathlib import Path from typing import Dict, List, Mapping, Tuple, Union @@ -621,6 +622,32 @@ def test_runopts_resolve_with_none_valued_aliases(self) -> None: with self.assertRaises(InvalidRunConfigException): opts.resolve({"model_type_name": None, "modelTypeName": "low"}) + def test_runopts_add_with_deprecated_aliases(self) -> None: + opts = runopts() + with warnings.catch_warnings(record=True) as w: + opts.add( + [runopt.deprecated("jobPriority"), "job_priority"], + type_=str, + help="run as user", + ) + self.assertEqual(len(w), 1) + self.assertEqual(w[0].category, UserWarning) + self.assertEqual( + str(w[0].message), + "The main name of the run option should be the head of the list.", + ) + + opts.resolve({"job_priority": "high"}) + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + opts.resolve({"jobPriority": "high"}) + self.assertEqual(len(w), 1) + self.assertEqual(w[0].category, UserWarning) + self.assertEqual( + str(w[0].message), + "Run option `jobPriority` is deprecated, use `job_priority` instead", + ) + def get_runopts(self) -> runopts: opts = runopts() opts.add("run_as", type_=str, help="run as user", required=True) From f7df09b35e203f5763838d4809e8badea734ee4c Mon Sep 17 00:00:00 2001 From: Abishek Sethuraman Date: Mon, 13 Oct 2025 04:29:31 -0700 Subject: [PATCH 3/3] Use auto aliasing for cases for the runopt (#1143) Summary: Introduces enum for auto aliasing based on casing for the runopt nam ``` class AutoAlias(IntEnum): snake_case = 0x1 SNAKE_CASE = 0x2 camelCase = 0x4 ``` So user can extend name to be used as ``` opts.add( ["job_priority", runopt.AutoAlias.camelCase], type_=str, help="run as user", ) opts.add( [ "model_type_name", runopt.AutoAlias.camelCase | runopt.AutoAlias.SNAKE_CASE, ], type_=str, help="run as user", ) ``` This should automatically produce additional aliases of `jobPriority` to `job_priority` and produce `modelTypeName` and `MODEL_TYPE_NAME` for `model_type_name` Reviewed By: kiukchung Differential Revision: D84192560 --- torchx/specs/api.py | 68 +++++++++++++++++++++++++++++------ torchx/specs/test/api_test.py | 22 ++++++++++++ 2 files changed, 79 insertions(+), 11 deletions(-) diff --git a/torchx/specs/api.py b/torchx/specs/api.py index c28c1a9c2..14e919f83 100644 --- a/torchx/specs/api.py +++ b/torchx/specs/api.py @@ -18,7 +18,7 @@ import warnings from dataclasses import asdict, dataclass, field from datetime import datetime -from enum import Enum +from enum import Enum, IntEnum from json import JSONDecodeError from string import Template from typing import ( @@ -892,6 +892,30 @@ class runopt: Represents the metadata about the specific run option """ + class AutoAlias(IntEnum): + snake_case = 0x1 + SNAKE_CASE = 0x2 + camelCase = 0x4 + + @staticmethod + def convert_to_camel_case(alias: str) -> str: + words = re.split(r"[_\-\s]+|(?<=[a-z])(?=[A-Z])", alias) + words = [w for w in words if w] # Remove empty strings + if not words: + return "" + return words[0].lower() + "".join(w.capitalize() for w in words[1:]) + + @staticmethod + def convert_to_snake_case(alias: str) -> str: + alias = re.sub(r"[-\s]+", "_", alias) + alias = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", alias) + alias = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1_\2", alias) + return alias.lower() + + @staticmethod + def convert_to_const_case(alias: str) -> str: + return runopt.AutoAlias.convert_to_snake_case(alias).upper() + class alias(str): pass @@ -902,8 +926,8 @@ class deprecated(str): opt_type: Type[CfgVal] is_required: bool help: str - aliases: list[alias] | None = None - deprecated_aliases: list[deprecated] | None = None + aliases: set[alias] | None = None + deprecated_aliases: set[deprecated] | None = None @property def is_type_list_of_str(self) -> bool: @@ -1189,15 +1213,28 @@ def cfg_from_json_repr(self, json_repr: str) -> Dict[str, CfgVal]: cfg[key] = val return cfg + def _generate_aliases( + self, auto_alias: int, aliases: set[str] + ) -> set[runopt.alias]: + generated_aliases = set() + for alias in aliases: + if auto_alias & runopt.AutoAlias.camelCase: + generated_aliases.add(runopt.AutoAlias.convert_to_camel_case(alias)) + if auto_alias & runopt.AutoAlias.snake_case: + generated_aliases.add(runopt.AutoAlias.convert_to_snake_case(alias)) + if auto_alias & runopt.AutoAlias.SNAKE_CASE: + generated_aliases.add(runopt.AutoAlias.convert_to_const_case(alias)) + return generated_aliases + def _get_primary_key_and_aliases( self, - cfg_key: list[str] | str, - ) -> tuple[str, list[runopt.alias], list[runopt.deprecated]]: + cfg_key: list[str | int] | str, + ) -> tuple[str, set[runopt.alias], set[runopt.deprecated]]: """ Returns the primary key and aliases for the given cfg_key. """ if isinstance(cfg_key, str): - return cfg_key, [], [] + return cfg_key, set(), set() if len(cfg_key) == 0: raise ValueError("cfg_key must be a non-empty list") @@ -1211,13 +1248,16 @@ def _get_primary_key_and_aliases( stacklevel=2, ) primary_key = None - aliases = list[runopt.alias]() - deprecated_aliases = list[runopt.deprecated]() + auto_alias = 0x0 + aliases = set[runopt.alias]() + deprecated_aliases = set[runopt.deprecated]() for name in cfg_key: if isinstance(name, runopt.alias): - aliases.append(name) + aliases.add(name) elif isinstance(name, runopt.deprecated): - deprecated_aliases.append(name) + deprecated_aliases.add(name) + elif isinstance(name, int): + auto_alias = auto_alias | name else: if primary_key is not None: raise ValueError( @@ -1228,11 +1268,17 @@ def _get_primary_key_and_aliases( raise ValueError( "Missing cfg_key. Please provide one other than the aliases." ) + if auto_alias != 0x0: + aliases_to_generate_for = aliases | {primary_key} + additional_aliases = self._generate_aliases( + auto_alias, aliases_to_generate_for + ) + aliases.update(additional_aliases) return primary_key, aliases, deprecated_aliases def add( self, - cfg_key: str | list[str], + cfg_key: str | list[str | int], type_: Type[CfgVal], help: str, default: CfgVal = None, diff --git a/torchx/specs/test/api_test.py b/torchx/specs/test/api_test.py index 3cdbadea5..d748579a1 100644 --- a/torchx/specs/test/api_test.py +++ b/torchx/specs/test/api_test.py @@ -648,6 +648,28 @@ def test_runopts_add_with_deprecated_aliases(self) -> None: "Run option `jobPriority` is deprecated, use `job_priority` instead", ) + def test_runopt_auto_aliases(self) -> None: + opts = runopts() + opts.add( + ["job_priority", runopt.AutoAlias.camelCase], + type_=str, + help="run as user", + ) + opts.add( + [ + "model_type_name", + runopt.AutoAlias.camelCase | runopt.AutoAlias.SNAKE_CASE, + ], + type_=str, + help="run as user", + ) + self.assertEqual(2, len(opts._opts)) + self.assertIsNotNone(opts.get("job_priority")) + self.assertIsNotNone(opts.get("jobPriority")) + self.assertIsNotNone(opts.get("model_type_name")) + self.assertIsNotNone(opts.get("modelTypeName")) + self.assertIsNotNone(opts.get("MODEL_TYPE_NAME")) + def get_runopts(self) -> runopts: opts = runopts() opts.add("run_as", type_=str, help="run as user", required=True)