diff --git a/java/test/src/main/java/io/ray/test/NodeLabelSchedulingTest.java b/java/test/src/main/java/io/ray/test/NodeLabelSchedulingTest.java index e321ecd9984f..b25457e87d6e 100644 --- a/java/test/src/main/java/io/ray/test/NodeLabelSchedulingTest.java +++ b/java/test/src/main/java/io/ray/test/NodeLabelSchedulingTest.java @@ -24,7 +24,7 @@ public void testEmptyNodeLabels() { } public void testSetNodeLabels() { - System.setProperty("ray.head-args.0", "--labels=\"gpu_type=A100,azone=azone-1\""); + System.setProperty("ray.head-args.0", "--labels={\"gpu_type\":\"A100\",\"azone\":\"azone-1\"}"); try { Ray.init(); List nodeInfos = Ray.getRuntimeContext().getAllNodeInfo(); diff --git a/python/ray/_private/label_utils.py b/python/ray/_private/label_utils.py deleted file mode 100644 index b4ac0d96fa3a..000000000000 --- a/python/ray/_private/label_utils.py +++ /dev/null @@ -1,100 +0,0 @@ -import re -import yaml -from typing import Dict - -import ray._private.ray_constants as ray_constants - -# Regex patterns used to validate that labels conform to Kubernetes label syntax rules. -# https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set - -# Regex for mandatory name (DNS label) or value -# Examples: -# Valid matches: "a", "label-name", "a-._b", "123", "this_is_a_valid_label" -# Invalid matches: "-abc", "abc-", "my@label", "a" * 64 -LABEL_REGEX = re.compile(r"[a-zA-Z0-9]([a-zA-Z0-9_.-]*[a-zA-Z0-9]){0,62}") - -# Regex for optional prefix (DNS subdomain) -# Examples: -# Valid matches: "abc", "sub.domain.example", "my-label", "123.456.789" -# Invalid matches: "-abc", "prefix_", "sub..domain", sub.$$.example -LABEL_PREFIX_REGEX = rf"^({LABEL_REGEX.pattern}?(\.{LABEL_REGEX.pattern}?)*)$" - - -def parse_node_labels_string(labels_str: str) -> Dict[str, str]: - labels = {} - - # Remove surrounding quotes if they exist - if len(labels_str) > 1 and labels_str.startswith('"') and labels_str.endswith('"'): - labels_str = labels_str[1:-1] - - if labels_str == "": - return labels - - # Labels argument should consist of a string of key=value pairs - # separated by commas. Labels follow Kubernetes label syntax. - label_pairs = labels_str.split(",") - for pair in label_pairs: - # Split each pair by `=` - key_value = pair.split("=") - if len(key_value) != 2: - raise ValueError("Label value is not a key-value pair.") - key = key_value[0].strip() - value = key_value[1].strip() - labels[key] = value - - return labels - - -def parse_node_labels_from_yaml_file(path: str) -> Dict[str, str]: - if path == "": - return {} - with open(path, "r") as file: - # Expects valid YAML content - labels = yaml.safe_load(file) - if not isinstance(labels, dict): - raise ValueError( - "The format after deserialization is not a key-value pair map." - ) - for key, value in labels.items(): - if not isinstance(key, str): - raise ValueError("The key is not string type.") - if not isinstance(value, str): - raise ValueError(f'The value of "{key}" is not string type.') - - return labels - - -def validate_node_labels(labels: Dict[str, str]): - if labels is None: - return - for key in labels.keys(): - if key.startswith(ray_constants.RAY_DEFAULT_LABEL_KEYS_PREFIX): - raise ValueError( - f"Custom label keys `{key}` cannot start with the prefix " - f"`{ray_constants.RAY_DEFAULT_LABEL_KEYS_PREFIX}`. " - f"This is reserved for Ray defined labels." - ) - if "/" in key: - prefix, name = key.rsplit("/") - if len(prefix) > 253 or not re.match(LABEL_PREFIX_REGEX, prefix): - raise ValueError( - f"Invalid label key prefix `{prefix}`. Prefix must be a series of DNS labels " - f"separated by dots (.),not longer than 253 characters in total." - ) - else: - name = key - if len(name) > 63 or not re.match(LABEL_REGEX, name): - raise ValueError( - f"Invalid label key name `{name}`. Name must be 63 chars or less beginning and ending " - f"with an alphanumeric character ([a-z0-9A-Z]) with dashes (-), underscores (_)," - f"dots (.), and alphanumerics between." - ) - value = labels.get(key) - if value is None or value == "": - return - if len(value) > 63 or not re.match(LABEL_REGEX, value): - raise ValueError( - f"Invalid label key value `{value}`. Value must be 63 chars or less beginning and ending " - f"with an alphanumeric character ([a-z0-9A-Z]) with dashes (-), underscores (_)," - f"dots (.), and alphanumerics between." - ) diff --git a/python/ray/_private/parameter.py b/python/ray/_private/parameter.py index 108d9dc919ca..3baba979e62f 100644 --- a/python/ray/_private/parameter.py +++ b/python/ray/_private/parameter.py @@ -3,8 +3,10 @@ from typing import Dict, List, Optional import ray._private.ray_constants as ray_constants -from ray._private.label_utils import validate_node_labels -from ray._private.utils import check_ray_client_dependencies_installed +from ray._private.utils import ( + validate_node_labels, + check_ray_client_dependencies_installed, +) logger = logging.getLogger(__name__) diff --git a/python/ray/_private/utils.py b/python/ray/_private/utils.py index 169d44baa4db..fb6b9828e16e 100644 --- a/python/ray/_private/utils.py +++ b/python/ray/_private/utils.py @@ -1803,6 +1803,43 @@ def update_envs(env_vars: Dict[str, str]): os.environ[key] = result +def parse_node_labels_json( + labels_json: str, cli_logger, cf, command_arg="--labels" +) -> Dict[str, str]: + try: + labels = json.loads(labels_json) + if not isinstance(labels, dict): + raise ValueError( + "The format after deserialization is not a key-value pair map" + ) + for key, value in labels.items(): + if not isinstance(key, str): + raise ValueError("The key is not string type.") + if not isinstance(value, str): + raise ValueError(f'The value of the "{key}" is not string type') + except Exception as e: + cli_logger.abort( + "`{}` is not a valid JSON string, detail error:{}" + "Valid values look like this: `{}`", + cf.bold(f"{command_arg}={labels_json}"), + str(e), + cf.bold(f'{command_arg}=\'{{"gpu_type": "A100", "region": "us"}}\''), + ) + return labels + + +def validate_node_labels(labels: Dict[str, str]): + if labels is None: + return + for key in labels.keys(): + if key.startswith(ray_constants.RAY_DEFAULT_LABEL_KEYS_PREFIX): + raise ValueError( + f"Custom label keys `{key}` cannot start with the prefix " + f"`{ray_constants.RAY_DEFAULT_LABEL_KEYS_PREFIX}`. " + f"This is reserved for Ray defined labels." + ) + + def parse_pg_formatted_resources_to_original( pg_formatted_resources: Dict[str, float] ) -> Dict[str, float]: diff --git a/python/ray/scripts/scripts.py b/python/ray/scripts/scripts.py index 397f6e638210..3e33963fc531 100644 --- a/python/ray/scripts/scripts.py +++ b/python/ray/scripts/scripts.py @@ -25,14 +25,11 @@ import ray import ray._private.ray_constants as ray_constants import ray._private.services as services -from ray._private.label_utils import ( - parse_node_labels_from_yaml_file, - parse_node_labels_string, -) from ray._private.utils import ( check_ray_client_dependencies_installed, load_class, parse_resources_json, + parse_node_labels_json, ) from ray._private.internal_api import memory_summary from ray._private.usage import usage_lib @@ -638,19 +635,9 @@ def debug(address: str, verbose: bool): "--labels", required=False, hidden=True, - default="", - type=str, - help="a string list of key-value pairs mapping label name to label value." - "These values take precedence over conflicting keys passed in from --labels-file." - 'Ex: --labels "key1=val1,key2=val2"', -) -@click.option( - "--labels-file", - required=False, - hidden=True, - default="", + default="{}", type=str, - help="a path to a YAML file containing a dictionary mapping of label keys to values.", + help="a JSON serialized dictionary mapping label name to label value.", ) @click.option( "--include-log-monitor", @@ -708,7 +695,6 @@ def start( ray_debugger_external, disable_usage_stats, labels, - labels_file, include_log_monitor, ): """Start Ray processes manually on the local machine.""" @@ -729,34 +715,7 @@ def start( node_ip_address = services.resolve_ip_for_localhost(node_ip_address) resources = parse_resources_json(resources, cli_logger, cf) - - # Compose labels passed in with `--labels` and `--labels-file`. - # The label value from `--labels` will overrwite the value of any duplicate keys. - try: - labels_from_file_dict = parse_node_labels_from_yaml_file(labels_file) - except Exception as e: - cli_logger.abort( - "The file at `{}` is not a valid YAML file, detailed error:{}" - "Valid values look like this: `{}`", - cf.bold(f"--labels-file={labels_file}"), - str(e), - cf.bold("--labels-file='gpu_type: A100\nregion: us'"), - ) - try: - labels_from_string = parse_node_labels_string(labels) - except Exception as e: - cli_logger.abort( - "`{}` is not a valid string of key-value pairs, detail error:{}" - "Valid values look like this: `{}`", - cf.bold(f"--labels={labels}"), - str(e), - cf.bold('--labels="key1=val1,key2=val2"'), - ) - labels_dict = ( - {**labels_from_file_dict, **labels_from_string} - if labels_from_file_dict - else labels_from_string - ) + labels_dict = parse_node_labels_json(labels, cli_logger, cf) if plasma_store_socket_name is not None: warnings.warn( diff --git a/python/ray/tests/BUILD b/python/ray/tests/BUILD index e902e1b59bd8..deb289cfa750 100644 --- a/python/ray/tests/BUILD +++ b/python/ray/tests/BUILD @@ -349,7 +349,6 @@ py_test_module_list( py_test_module_list( size = "medium", files = [ - "test_label_utils.py", "test_minimal_install.py", "test_runtime_env_ray_minimal.py", "test_utils.py", diff --git a/python/ray/tests/test_label_utils.py b/python/ray/tests/test_label_utils.py deleted file mode 100644 index 328a2594602b..000000000000 --- a/python/ray/tests/test_label_utils.py +++ /dev/null @@ -1,121 +0,0 @@ -import pytest -from ray._private.label_utils import ( - parse_node_labels_string, - parse_node_labels_from_yaml_file, - validate_node_labels, -) -import sys -import tempfile - - -def test_parse_node_labels_from_string(): - # Empty label argument passed - labels_string = "" - labels_dict = parse_node_labels_string(labels_string) - assert labels_dict == {} - - # Valid label key with empty value - labels_string = "region=" - labels_dict = parse_node_labels_string(labels_string) - assert labels_dict == {"region": ""} - - # Multiple valid label keys and values - labels_string = "ray.io/accelerator-type=A100,region=us-west4" - labels_dict = parse_node_labels_string(labels_string) - assert labels_dict == {"ray.io/accelerator-type": "A100", "region": "us-west4"} - - # Invalid label - labels_string = "ray.io/accelerator-type=type=A100" - with pytest.raises(ValueError) as e: - parse_node_labels_string(labels_string) - assert "Label value is not a key-value pair" in str(e) - - -def test_parse_node_labels_from_yaml_file(): - # Empty/invalid yaml - with tempfile.NamedTemporaryFile(mode="w+", delete=True) as test_file: - test_file.write("") - test_file.flush() # Ensure data is written - with pytest.raises(ValueError) as e: - parse_node_labels_from_yaml_file(test_file.name) - assert "The format after deserialization is not a key-value pair map" in str(e) - - # With non-existent yaml file - with pytest.raises(FileNotFoundError): - parse_node_labels_from_yaml_file("missing-file.yaml") - - # Valid label key with empty value - with tempfile.NamedTemporaryFile(mode="w+", delete=True) as test_file: - test_file.write('"ray.io/accelerator-type": ""') - test_file.flush() # Ensure data is written - labels_dict = parse_node_labels_from_yaml_file(test_file.name) - assert labels_dict == {"ray.io/accelerator-type": ""} - - # Multiple valid label keys and values - with tempfile.NamedTemporaryFile(mode="w+", delete=True) as test_file: - test_file.write( - '"ray.io/accelerator-type": "A100"\n"region": "us"\n"market-type": "spot"' - ) - test_file.flush() # Ensure data is written - labels_dict = parse_node_labels_from_yaml_file(test_file.name) - assert labels_dict == { - "ray.io/accelerator-type": "A100", - "region": "us", - "market-type": "spot", - } - - # Non-string label key - with tempfile.NamedTemporaryFile(mode="w+", delete=True) as test_file: - test_file.write('{100: "A100"}') - test_file.flush() # Ensure data is written - with pytest.raises(ValueError) as e: - parse_node_labels_from_yaml_file(test_file.name) - assert "The key is not string type." in str(e) - - # Non-string label value - with tempfile.NamedTemporaryFile(mode="w+", delete=True) as test_file: - test_file.write('{"gpu": 100}') - test_file.flush() # Ensure data is written - with pytest.raises(ValueError) as e: - parse_node_labels_from_yaml_file(test_file.name) - assert 'The value of "gpu" is not string type' in str(e) - - -def test_validate_node_labels(): - # Custom label starts with ray.io prefix - labels_dict = {"ray.io/accelerator-type": "A100"} - with pytest.raises(ValueError) as e: - validate_node_labels(labels_dict) - assert "This is reserved for Ray defined labels." in str(e) - - # Invalid key prefix syntax - labels_dict = {"!invalidPrefix/accelerator-type": "A100"} - with pytest.raises(ValueError) as e: - validate_node_labels(labels_dict) - assert "Invalid label key prefix" in str(e) - - # Invalid key name syntax - labels_dict = {"!!accelerator-type?": "A100"} - with pytest.raises(ValueError) as e: - validate_node_labels(labels_dict) - assert "Invalid label key name" in str(e) - - # Invalid key value syntax - labels_dict = {"accelerator-type": "??"} - with pytest.raises(ValueError) as e: - validate_node_labels(labels_dict) - assert "Invalid label key value" in str(e) - - # Valid node label - labels_dict = {"accelerator-type": "A100"} - validate_node_labels(labels_dict) - - -if __name__ == "__main__": - import os - - # Skip test_basic_2_client_mode for now- the test suite is breaking. - if os.environ.get("PARALLEL_CI"): - sys.exit(pytest.main(["-n", "auto", "--boxed", "-vs", __file__])) - else: - sys.exit(pytest.main(["-sv", __file__])) diff --git a/python/ray/tests/test_node_label_scheduling_strategy.py b/python/ray/tests/test_node_label_scheduling_strategy.py index 8ed19dfba377..83195c6922e9 100644 --- a/python/ray/tests/test_node_label_scheduling_strategy.py +++ b/python/ray/tests/test_node_label_scheduling_strategy.py @@ -31,7 +31,7 @@ def get_node_id(): @pytest.mark.parametrize( "call_ray_start", - ["ray start --head --labels gpu_type=A100,region=us"], + ['ray start --head --labels={"gpu_type":"A100","region":"us"}'], indirect=True, ) def test_node_label_scheduling_basic(call_ray_start): diff --git a/python/ray/tests/test_node_labels.py b/python/ray/tests/test_node_labels.py index 850263d3b532..0854286d3bda 100644 --- a/python/ray/tests/test_node_labels.py +++ b/python/ray/tests/test_node_labels.py @@ -19,7 +19,7 @@ def add_default_labels(node_info, labels): @pytest.mark.parametrize( "call_ray_start", - ['ray start --head --labels "gpu_type=A100,region=us"'], + ['ray start --head --labels={"gpu_type":"A100","region":"us"}'], indirect=True, ) def test_ray_start_set_node_labels(call_ray_start): @@ -33,7 +33,7 @@ def test_ray_start_set_node_labels(call_ray_start): @pytest.mark.parametrize( "call_ray_start", [ - 'ray start --head --labels ""', + "ray start --head --labels={}", ], indirect=True, ) @@ -80,17 +80,19 @@ def test_ray_init_set_node_labels_value_error(ray_start_cluster): def test_ray_start_set_node_labels_value_error(): - out = check_cmd_stderr(["ray", "start", "--head", "--labels", "xxx"]) - assert "is not a valid string of key-value pairs, detail error" in out + out = check_cmd_stderr(["ray", "start", "--head", "--labels=xxx"]) + assert "is not a valid JSON string, detail error" in out - out = check_cmd_stderr(["ray", "start", "--head", "--labels", "!gpu_type=1"]) - assert "Invalid label key name `!gpu_type`" in out + out = check_cmd_stderr(["ray", "start", "--head", '--labels={"gpu_type":1}']) + assert 'The value of the "gpu_type" is not string type' in out - out = check_cmd_stderr(["ray", "start", "--head", "--labels", "ray.io/node_id=111"]) + out = check_cmd_stderr( + ["ray", "start", "--head", '--labels={"ray.io/node_id":"111"}'] + ) assert "cannot start with the prefix `ray.io/`" in out out = check_cmd_stderr( - ["ray", "start", "--head", "--labels", "ray.io/other_key=111"] + ["ray", "start", "--head", '--labels={"ray.io/other_key":"111"}'] ) assert "cannot start with the prefix `ray.io/`" in out diff --git a/python/ray/tests/test_utils.py b/python/ray/tests/test_utils.py index fe81b3ac7ed3..b5bb5d34f86a 100644 --- a/python/ray/tests/test_utils.py +++ b/python/ray/tests/test_utils.py @@ -1,6 +1,6 @@ # coding: utf-8 """ -Unit/Integration Testing for python/ray/_private/utils.py +Unit/Integration Testing for python/_private/utils.py This currently expects to work for minimal installs. """