diff --git a/python/ray/_common/network_utils.py b/python/ray/_common/network_utils.py index 593407b5c377..cf708b15448d 100644 --- a/python/ray/_common/network_utils.py +++ b/python/ray/_common/network_utils.py @@ -1,89 +1,16 @@ import socket from contextlib import closing -from functools import lru_cache -from typing import Optional, Tuple, Union -from ray._raylet import ( - build_address as _build_address, - is_ipv6 as _is_ipv6, - node_ip_address_from_perspective as _node_ip_address_from_perspective, - parse_address as _parse_address, +from ray._raylet import ( # noqa: F401 + build_address, + get_all_interfaces_ip, + get_localhost_ip, + is_ipv6, + node_ip_address_from_perspective, + parse_address, ) -def parse_address(address: str) -> Optional[Tuple[str, str]]: - """Parse a network address string into host and port. - - Args: - address: The address string to parse (e.g., "localhost:8000", "[::1]:8000"). - - Returns: - Tuple with (host, port) if port found, None if no colon separator. - """ - return _parse_address(address) - - -def build_address(host: str, port: Union[int, str]) -> str: - """Build a network address string from host and port. - - Args: - host: The hostname or IP address. - port: The port number (int or string). - - Returns: - Formatted address string (e.g., "localhost:8000" or "[::1]:8000"). - """ - return _build_address(host, port) - - -def node_ip_address_from_perspective(address: Optional[str] = None) -> str: - """IP address by which the local node can be reached *from* the `address`. - - If no address is given, defaults to public DNS servers for detection. - - Args: - address: The IP address and port of any known live service on the - network you care about. - - Returns: - The IP address by which the local node can be reached from the address. - """ - return _node_ip_address_from_perspective(address) - - -def is_ipv6(host: str) -> bool: - """Check if a host is resolved to IPv6. - - Args: - host: The IP or domain name to check (must be without port). - - Returns: - True if the host is resolved to IPv6, False if IPv4. - """ - return _is_ipv6(host) - - -@lru_cache(maxsize=1) -def get_localhost_ip() -> str: - """Get localhost loopback ip with IPv4/IPv6 support. - - Returns: - The localhost loopback IP. - """ - # Try IPv4 first, then IPv6 localhost resolution - for family in [socket.AF_INET, socket.AF_INET6]: - try: - dns_result = socket.getaddrinfo( - "localhost", None, family, socket.SOCK_STREAM - ) - return dns_result[0][4][0] - except Exception: - continue - - # Final fallback to IPv4 loopback - return "127.0.0.1" - - def is_localhost(host: str) -> bool: """Check if the given host string represents a localhost address. diff --git a/python/ray/_private/parameter.py b/python/ray/_private/parameter.py index 8b1b23ccaecc..3347ddd7e528 100644 --- a/python/ray/_private/parameter.py +++ b/python/ray/_private/parameter.py @@ -4,6 +4,7 @@ from typing import Dict, List, Optional import ray._private.ray_constants as ray_constants +from ray._common.network_utils import get_localhost_ip from ray._private.resource_isolation_config import ResourceIsolationConfig from ray._private.utils import get_ray_client_dependency_error @@ -75,10 +76,9 @@ class RayParams: UI, which displays the status of the Ray cluster. If this value is None, then the UI will be started if the relevant dependencies are present. - dashboard_host: The host to bind the web UI server to. Can either be - localhost (127.0.0.1) or 0.0.0.0 (available from all interfaces). - By default, this is set to localhost to prevent access from - external machines. + dashboard_host: The host to bind the dashboard server to. Use localhost + (127.0.0.1/::1) for local access, the node IP for remote access, or + 0.0.0.0/:: for all interfaces (not recommended). Defaults to localhost. dashboard_port: The port to bind the dashboard server to. Defaults to 8265. dashboard_agent_listen_port: The port for dashboard agents to listen on @@ -154,7 +154,7 @@ def __init__( setup_worker_path: Optional[str] = None, huge_pages: Optional[bool] = False, include_dashboard: Optional[bool] = None, - dashboard_host: Optional[str] = ray_constants.DEFAULT_DASHBOARD_IP, + dashboard_host: Optional[str] = get_localhost_ip(), dashboard_port: Optional[bool] = ray_constants.DEFAULT_DASHBOARD_PORT, dashboard_agent_listen_port: Optional[ int diff --git a/python/ray/_private/ray_constants.py b/python/ray/_private/ray_constants.py index 6a70a6396388..7c3748862c20 100644 --- a/python/ray/_private/ray_constants.py +++ b/python/ray/_private/ray_constants.py @@ -173,7 +173,6 @@ def env_set_by_user(key): # Timeout waiting for the dashboard to come alive during node startup. RAY_DASHBOARD_STARTUP_TIMEOUT_S = env_integer("RAY_DASHBOARD_STARTUP_TIMEOUT_S", 60) -DEFAULT_DASHBOARD_IP = "127.0.0.1" DEFAULT_DASHBOARD_PORT = 8265 DASHBOARD_ADDRESS = "dashboard" DASHBOARD_CLIENT_MAX_SIZE = 100 * 1024**2 diff --git a/python/ray/_private/worker.py b/python/ray/_private/worker.py index e1d5ca281407..e10f80d2d2a6 100644 --- a/python/ray/_private/worker.py +++ b/python/ray/_private/worker.py @@ -38,6 +38,8 @@ ) from urllib.parse import urlparse +from ray._common.network_utils import get_localhost_ip + if TYPE_CHECKING: import torch @@ -1420,7 +1422,7 @@ def init( local_mode: bool = False, ignore_reinit_error: bool = False, include_dashboard: Optional[bool] = None, - dashboard_host: str = ray_constants.DEFAULT_DASHBOARD_IP, + dashboard_host: str = get_localhost_ip(), dashboard_port: Optional[int] = None, job_config: "ray.job_config.JobConfig" = None, configure_logging: bool = True, @@ -1513,10 +1515,9 @@ def init( Ray dashboard, which displays the status of the Ray cluster. If this argument is None, then the UI will be started if the relevant dependencies are present. - dashboard_host: The host to bind the dashboard server to. Can either be - localhost (127.0.0.1) or 0.0.0.0 (available from all interfaces). - By default, this is set to localhost to prevent access from - external machines. + dashboard_host: The host to bind the dashboard server to. Use localhost + (127.0.0.1/::1) for local access, the node IP for remote access, or + 0.0.0.0/:: for all interfaces (not recommended). Defaults to localhost. dashboard_port(int, None): The port to bind the dashboard server to. Defaults to 8265 and Ray will automatically find a free port if 8265 is not available. diff --git a/python/ray/autoscaler/_private/monitor.py b/python/ray/autoscaler/_private/monitor.py index abea72ce94e9..fb53ac321ff2 100644 --- a/python/ray/autoscaler/_private/monitor.py +++ b/python/ray/autoscaler/_private/monitor.py @@ -14,7 +14,12 @@ import ray import ray._private.ray_constants as ray_constants -from ray._common.network_utils import build_address, parse_address +from ray._common.network_utils import ( + build_address, + get_localhost_ip, + is_localhost, + parse_address, +) from ray._common.ray_constants import ( LOGGING_ROTATE_BACKUP_COUNT, LOGGING_ROTATE_BYTES, @@ -195,7 +200,9 @@ def __init__( AUTOSCALER_METRIC_PORT ) ) - kwargs = {"addr": "127.0.0.1"} if head_node_ip == "127.0.0.1" else {} + kwargs = ( + {"addr": get_localhost_ip()} if is_localhost(head_node_ip) else {} + ) prometheus_client.start_http_server( port=AUTOSCALER_METRIC_PORT, registry=self.prom_metrics.registry, diff --git a/python/ray/autoscaler/v2/monitor.py b/python/ray/autoscaler/v2/monitor.py index 34e31e7ac649..d87c81c44c3e 100644 --- a/python/ray/autoscaler/v2/monitor.py +++ b/python/ray/autoscaler/v2/monitor.py @@ -13,7 +13,12 @@ import ray import ray._private.ray_constants as ray_constants -from ray._common.network_utils import build_address, parse_address +from ray._common.network_utils import ( + build_address, + get_localhost_ip, + is_localhost, + parse_address, +) from ray._common.ray_constants import ( LOGGING_ROTATE_BACKUP_COUNT, LOGGING_ROTATE_BYTES, @@ -114,7 +119,9 @@ def __init__( AUTOSCALER_METRIC_PORT ) ) - kwargs = {"addr": "127.0.0.1"} if head_node_ip == "127.0.0.1" else {} + kwargs = ( + {"addr": get_localhost_ip()} if is_localhost(head_node_ip) else {} + ) prometheus_client.start_http_server( port=AUTOSCALER_METRIC_PORT, registry=prom_metrics.registry, diff --git a/python/ray/dashboard/head.py b/python/ray/dashboard/head.py index 94a8bb3cf380..18a07d1a650e 100644 --- a/python/ray/dashboard/head.py +++ b/python/ray/dashboard/head.py @@ -9,7 +9,7 @@ import ray.dashboard.consts as dashboard_consts import ray.dashboard.utils as dashboard_utils import ray.experimental.internal_kv as internal_kv -from ray._common.network_utils import build_address +from ray._common.network_utils import build_address, get_localhost_ip, is_localhost from ray._common.usage.usage_lib import TagKey, record_extra_usage_tag from ray._private import ray_constants from ray._private.async_utils import enable_monitor_loop_lag @@ -96,7 +96,7 @@ def __init__( self.serve_frontend = False # Public attributes are accessible for all head modules. # Walkaround for issue: https://github.com/ray-project/ray/issues/7084 - self.http_host = "127.0.0.1" if http_host == "localhost" else http_host + self.http_host = get_localhost_ip() if http_host == "localhost" else http_host self.http_port = http_port self.http_port_retries = http_port_retries self._modules_to_load = modules_to_load @@ -313,7 +313,7 @@ async def _setup_metrics(self, gcs_client): DASHBOARD_METRIC_PORT ) ) - kwargs = {"addr": "127.0.0.1"} if self.ip == "127.0.0.1" else {} + kwargs = {"addr": get_localhost_ip()} if is_localhost(self.ip) else {} prometheus_client.start_http_server( port=DASHBOARD_METRIC_PORT, registry=metrics.registry, @@ -444,12 +444,8 @@ def on_new_lag(lag_s): logger.info("http server disabled.") # We need to expose dashboard's node's ip for other worker nodes - # if it's listening to all interfaces. - dashboard_http_host = ( - self.ip - if self.http_host != ray_constants.DEFAULT_DASHBOARD_IP - else http_host - ) + # if it's not localhost. + dashboard_http_host = self.ip if not is_localhost(self.http_host) else http_host # This synchronous code inside an async context is not great. # It is however acceptable, because this only gets run once # during initialization and therefore cannot block the event loop. diff --git a/python/ray/dashboard/http_server_agent.py b/python/ray/dashboard/http_server_agent.py index 7aa58ae0324b..9ac63b32625f 100644 --- a/python/ray/dashboard/http_server_agent.py +++ b/python/ray/dashboard/http_server_agent.py @@ -6,7 +6,7 @@ from packaging.version import Version import ray.dashboard.optional_utils as dashboard_optional_utils -from ray._common.network_utils import build_address, is_localhost +from ray._common.network_utils import build_address, get_localhost_ip, is_localhost from ray._common.utils import get_or_create_event_loop from ray._private.authentication.http_token_authentication import ( get_token_auth_middleware, @@ -60,7 +60,7 @@ async def _start_site_with_retry( if not is_localhost(self.ip): local_site = aiohttp.web.TCPSite( self.runner, - "127.0.0.1", + get_localhost_ip(), self.listen_port, ) await local_site.start() diff --git a/python/ray/dashboard/modules/job/tests/test_job_manager.py b/python/ray/dashboard/modules/job/tests/test_job_manager.py index a7ce5fe20782..18f2dc1344cf 100644 --- a/python/ray/dashboard/modules/job/tests/test_job_manager.py +++ b/python/ray/dashboard/modules/job/tests/test_job_manager.py @@ -12,7 +12,7 @@ import pytest import ray -from ray._common.network_utils import build_address +from ray._common.network_utils import build_address, get_localhost_ip from ray._common.test_utils import ( FakeTimer, SignalActor, @@ -1370,7 +1370,7 @@ async def test_bootstrap_address(job_manager, monkeypatch): cluster might be started with http://ip:{dashboard_port} from previous runs. """ - ip = ray._private.ray_constants.DEFAULT_DASHBOARD_IP + ip = get_localhost_ip() port = ray._private.ray_constants.DEFAULT_DASHBOARD_PORT monkeypatch.setenv("RAY_ADDRESS", f"http://{build_address(ip, port)}") diff --git a/python/ray/dashboard/modules/reporter/reporter_agent.py b/python/ray/dashboard/modules/reporter/reporter_agent.py index df47152336b9..ae8bc7b96b67 100644 --- a/python/ray/dashboard/modules/reporter/reporter_agent.py +++ b/python/ray/dashboard/modules/reporter/reporter_agent.py @@ -25,6 +25,7 @@ import ray._private.prometheus_exporter as prometheus_exporter import ray.dashboard.modules.reporter.reporter_consts as reporter_consts import ray.dashboard.utils as dashboard_utils +from ray._common.network_utils import get_localhost_ip, is_localhost from ray._common.utils import ( get_or_create_event_loop, ) @@ -452,7 +453,7 @@ def __init__(self, dashboard_agent, raylet_client=None): prometheus_exporter.Options( namespace="ray", port=dashboard_agent.metrics_export_port, - address="127.0.0.1" if self._ip == "127.0.0.1" else "", + address=get_localhost_ip() if is_localhost(self._ip) else "", ) ) dashboard_agent.metrics_export_port = stats_exporter.port diff --git a/python/ray/dashboard/modules/reporter/tests/test_reporter.py b/python/ray/dashboard/modules/reporter/tests/test_reporter.py index 6a6281aab1fe..18dda866ec1f 100644 --- a/python/ray/dashboard/modules/reporter/tests/test_reporter.py +++ b/python/ray/dashboard/modules/reporter/tests/test_reporter.py @@ -1202,16 +1202,13 @@ def get_pid(self): a = MyActor.remote() worker_pid = ray.get(a.get_pid.remote()) + node = ray._private.worker.global_worker.node dashboard_agent = MagicMock() - dashboard_agent.gcs_address = build_address("127.0.0.1", 6379) - dashboard_agent.ip = "127.0.0.1" - dashboard_agent.node_manager_port = ( - ray._private.worker.global_worker.node.node_manager_port - ) - dashboard_agent.session_dir = ( - ray._private.worker.global_worker.node.get_session_dir_path() - ) - dashboard_agent.node_id = ray._private.worker.global_worker.node.unique_id + dashboard_agent.gcs_address = build_address(node.node_ip_address, 6379) + dashboard_agent.ip = node.node_ip_address + dashboard_agent.node_manager_port = node.node_manager_port + dashboard_agent.session_dir = node.get_session_dir_path() + dashboard_agent.node_id = ray.NodeID.from_random().hex() agent = ReporterAgent(dashboard_agent) pids = await agent._async_get_worker_pids_from_raylet() assert len(pids) == 2 @@ -1232,14 +1229,13 @@ def get_pid(self): async def test_reporter_dashboard_and_runtime_env_agent( ray_start_with_dashboard, tmp_path ): + node = ray._private.worker.global_worker.node dashboard_agent = MagicMock() - dashboard_agent.gcs_address = build_address("127.0.0.1", 6379) + dashboard_agent.gcs_address = build_address(node.node_ip_address, 6379) dashboard_agent.session_dir = str(tmp_path) dashboard_agent.node_id = ray.NodeID.from_random().hex() - dashboard_agent.ip = "127.0.0.1" - dashboard_agent.node_manager_port = ( - ray._private.worker.global_worker.node.node_manager_port - ) + dashboard_agent.ip = node.node_ip_address + dashboard_agent.node_manager_port = node.node_manager_port agent = ReporterAgent(dashboard_agent) agent_pids = await agent._async_get_agent_pids_from_raylet() assert len(agent_pids) == 2 diff --git a/python/ray/includes/network_util.pxd b/python/ray/includes/network_util.pxd index 569734cadc41..aaf763bfc06e 100644 --- a/python/ray/includes/network_util.pxd +++ b/python/ray/includes/network_util.pxd @@ -10,3 +10,5 @@ cdef extern from "ray/util/network_util.h" namespace "ray": optional[array_string_2] ParseAddress(const string &address) string GetNodeIpAddressFromPerspective(const optional[string] &address) bool IsIPv6(const string &host) + string GetLocalhostIP() + string GetAllInterfacesIP() diff --git a/python/ray/includes/network_util.pxi b/python/ray/includes/network_util.pxi index 76302717621b..6040f165d30d 100644 --- a/python/ray/includes/network_util.pxi +++ b/python/ray/includes/network_util.pxi @@ -3,14 +3,23 @@ from ray.includes.network_util cimport ( ParseAddress, GetNodeIpAddressFromPerspective, IsIPv6, + GetLocalhostIP, + GetAllInterfacesIP, array_string_2, optional, ) from libcpp.string cimport string from typing import Optional, Tuple, Union -import socket def parse_address(address: str) -> Optional[Tuple[str, str]]: + """Parse a network address string into host and port. + + Args: + address: The address string to parse (e.g., "localhost:8000", "[::1]:8000"). + + Returns: + Tuple with (host, port) if port found, None if no colon separator. + """ cdef optional[array_string_2] res = ParseAddress(address.encode('utf-8')) if not res.has_value(): return None @@ -20,6 +29,15 @@ def parse_address(address: str) -> Optional[Tuple[str, str]]: def build_address(host: str, port: Union[int, str]) -> str: + """Build a network address string from host and port. + + Args: + host: The hostname or IP address. + port: The port number (int or string). + + Returns: + Formatted address string (e.g., "localhost:8000" or "[::1]:8000"). + """ cdef string host_c = host.encode('utf-8') cdef string result cdef string port_c @@ -34,6 +52,17 @@ def build_address(host: str, port: Union[int, str]) -> str: def node_ip_address_from_perspective(address=None) -> str: + """IP address by which the local node can be reached from the address. + + If no address is given, defaults to public DNS servers for detection. + + Args: + address: The IP address and port of any known live service on the + network you care about. + + Returns: + The IP address by which the local node can be reached from the address. + """ cdef string node_ip cdef optional[string] address_c cdef string address_str @@ -47,5 +76,33 @@ def node_ip_address_from_perspective(address=None) -> str: def is_ipv6(host: str) -> bool: + """Check if a host is resolved to IPv6. + + Args: + host: The IP or domain name to check (must be without port). + + Returns: + True if the host is resolved to IPv6, False if IPv4. + """ cdef string host_c = host.encode('utf-8') return IsIPv6(host_c) + + +def get_localhost_ip() -> str: + """Get localhost loopback IP with IPv4/IPv6 support. + + Returns: + "127.0.0.1" for IPv4 or "::1" for IPv6. + """ + cdef string result = GetLocalhostIP() + return result.decode('utf-8') + + +def get_all_interfaces_ip() -> str: + """Get the IP address to bind to all network interfaces. + + Returns "0.0.0.0" for IPv4 or "::" for IPv6, depending on the system's + localhost resolution. + """ + cdef string result = GetAllInterfacesIP() + return result.decode('utf-8') diff --git a/python/ray/scripts/scripts.py b/python/ray/scripts/scripts.py index 11db478b9a21..77a7e69d1e0c 100644 --- a/python/ray/scripts/scripts.py +++ b/python/ray/scripts/scripts.py @@ -21,7 +21,7 @@ import ray._common.usage.usage_constants as usage_constant import ray._private.ray_constants as ray_constants import ray._private.services as services -from ray._common.network_utils import build_address, parse_address +from ray._common.network_utils import build_address, get_localhost_ip, parse_address from ray._common.usage import usage_lib from ray._common.utils import load_class from ray._private.authentication.authentication_token_setup import ( @@ -492,10 +492,10 @@ def debug(address: str, verbose: bool): @click.option( "--dashboard-host", required=False, - default=ray_constants.DEFAULT_DASHBOARD_IP, - help="the host to bind the dashboard server to, either localhost " - "(127.0.0.1) or 0.0.0.0 (available from all interfaces). By default, this " - "is 127.0.0.1", + default=get_localhost_ip(), + help="the host to bind the dashboard server to. Use localhost " + "(127.0.0.1/::1) for local access, the node IP for remote access, or " + "0.0.0.0/:: for all interfaces (not recommended). Defaults to localhost.", ) @click.option( "--dashboard-port", diff --git a/python/ray/scripts/symmetric_run.py b/python/ray/scripts/symmetric_run.py index f1bf2a5987e9..83a322ead607 100644 --- a/python/ray/scripts/symmetric_run.py +++ b/python/ray/scripts/symmetric_run.py @@ -9,6 +9,7 @@ import click import ray +from ray._common.network_utils import is_localhost from ray._private.ray_constants import env_integer from ray._raylet import GcsClient from ray.exceptions import RpcError @@ -194,7 +195,7 @@ def symmetric_run(address, min_nodes, ray_args_and_entrypoint): if min_nodes > 1: # Ban localhost ips if we are not running on a single node # to avoid starting N head nodes - my_ips = [ip for ip in my_ips if ip != "127.0.0.1" and ip != "::1"] + my_ips = [ip for ip in my_ips if not is_localhost(ip)] is_head = resolved_gcs_host in my_ips diff --git a/python/ray/serve/_private/api.py b/python/ray/serve/_private/api.py index 60b097d4c001..487107c2dcbb 100644 --- a/python/ray/serve/_private/api.py +++ b/python/ray/serve/_private/api.py @@ -174,8 +174,8 @@ def serve_start( with fields: - host(str, None): Host for HTTP servers to listen on. Defaults to - "127.0.0.1". To expose Serve publicly, you probably want to set - this to "0.0.0.0". + localhost. To expose Serve publicly, you probably want to set + this to "0.0.0.0" for IPv4 or "::" for IPv6. - port(int): Port for HTTP server. Defaults to 8000. - root_path(str): Root path to mount the serve application (for example, "/serve"). All deployment routes will be prefixed diff --git a/python/ray/serve/_private/constants.py b/python/ray/serve/_private/constants.py index 3b10f4dacdeb..5f4d0e583742 100644 --- a/python/ray/serve/_private/constants.py +++ b/python/ray/serve/_private/constants.py @@ -26,9 +26,6 @@ #: Ray namespace used for all Serve actors SERVE_NAMESPACE = "serve" -#: HTTP Host -DEFAULT_HTTP_HOST = "127.0.0.1" - #: HTTP Port DEFAULT_HTTP_PORT = 8000 diff --git a/python/ray/serve/config.py b/python/ray/serve/config.py index ae6dc19e58c7..fbf561ea44cf 100644 --- a/python/ray/serve/config.py +++ b/python/ray/serve/config.py @@ -6,6 +6,7 @@ from typing import Any, Callable, Dict, List, Optional, Union from ray import cloudpickle +from ray._common.network_utils import get_localhost_ip from ray._common.pydantic_compat import ( BaseModel, Field, @@ -23,7 +24,6 @@ from ray.serve._private.constants import ( DEFAULT_AUTOSCALING_POLICY_NAME, DEFAULT_GRPC_PORT, - DEFAULT_HTTP_HOST, DEFAULT_HTTP_PORT, DEFAULT_REQUEST_ROUTER_PATH, DEFAULT_REQUEST_ROUTING_STATS_PERIOD_S, @@ -637,8 +637,8 @@ class HTTPOptions(BaseModel): """HTTP options for the proxies. Supported fields: - host: Host that the proxies listens for HTTP on. Defaults to - "127.0.0.1". To expose Serve publicly, you probably want to set - this to "0.0.0.0". + localhost. To expose Serve publicly, you probably want to set + this to "0.0.0.0" for IPv4 or "::" for IPv6. - port: Port that the proxies listen for HTTP on. Defaults to 8000. - root_path: An optional root path to mount the serve application (for example, "/prefix"). All deployment routes are prefixed @@ -667,7 +667,7 @@ class HTTPOptions(BaseModel): internal Serve HTTP proxy actor. """ - host: Optional[str] = DEFAULT_HTTP_HOST + host: Optional[str] = get_localhost_ip() port: int = DEFAULT_HTTP_PORT middlewares: List[Any] = [] location: Optional[DeploymentMode] = DeploymentMode.HeadOnly diff --git a/python/ray/serve/schema.py b/python/ray/serve/schema.py index c5cd17defe50..aa4d3eb069e4 100644 --- a/python/ray/serve/schema.py +++ b/python/ray/serve/schema.py @@ -6,6 +6,7 @@ from typing import Any, Callable, Dict, List, Optional, Set, Union from zlib import crc32 +from ray._common.network_utils import get_all_interfaces_ip from ray._common.pydantic_compat import ( BaseModel, Extra, @@ -548,12 +549,13 @@ class ServeApplicationSchema(BaseModel): ), ) host: str = Field( - default="0.0.0.0", + default_factory=get_all_interfaces_ip, description=( "Host for HTTP servers to listen on. Defaults to " - '"0.0.0.0", which exposes Serve publicly. Cannot be updated once ' - "your Serve application has started running. The Serve application " - "must be shut down and restarted with the new host instead." + "all interfaces (0.0.0.0 for IPv4, :: for IPv6), which exposes " + "Serve publicly. Cannot be updated once your Serve application " + "has started running. The Serve application must be shut down and " + "restarted with the new host instead." ), ) port: int = Field( @@ -728,12 +730,12 @@ class HTTPOptionsSchema(BaseModel): """ host: str = Field( - default="0.0.0.0", + default_factory=get_all_interfaces_ip, description=( "Host for HTTP servers to listen on. Defaults to " - '"0.0.0.0", which exposes Serve publicly. Cannot be updated once ' - "Serve has started running. Serve must be shut down and restarted " - "with the new host instead." + "all interfaces (0.0.0.0 for IPv4, :: for IPv6), which exposes " + "Serve publicly. Cannot be updated once Serve has started running. " + "Serve must be shut down and restarted with the new host instead." ), ) port: int = Field( diff --git a/python/ray/serve/scripts.py b/python/ray/serve/scripts.py index 4bd3b0ba5f7d..738ffd728aaf 100644 --- a/python/ray/serve/scripts.py +++ b/python/ray/serve/scripts.py @@ -15,6 +15,7 @@ import ray from ray import serve +from ray._common.network_utils import get_all_interfaces_ip, get_localhost_ip from ray._common.utils import import_attr from ray.autoscaler._private.cli_logger import cli_logger from ray.dashboard.modules.dashboard_sdk import parse_runtime_env_args @@ -23,7 +24,6 @@ from ray.serve._private.build_app import BuiltApplication, build_app from ray.serve._private.constants import ( DEFAULT_GRPC_PORT, - DEFAULT_HTTP_HOST, DEFAULT_HTTP_PORT, SERVE_DEFAULT_APP_NAME, SERVE_NAMESPACE, @@ -156,10 +156,10 @@ def cli(): ) @click.option( "--http-host", - default=DEFAULT_HTTP_HOST, + default=get_localhost_ip(), required=False, type=str, - help="Host for HTTP proxies to listen on. " f"Defaults to {DEFAULT_HTTP_HOST}.", + help="Host for HTTP proxies to listen on. Defaults to localhost(127.0.0.1/::1).", ) @click.option( "--http-port", @@ -861,7 +861,7 @@ def build_app_config(import_path: str, name: str = None): deploy_config = { "proxy_location": "EveryNode", "http_options": { - "host": "0.0.0.0", + "host": get_all_interfaces_ip(), "port": 8000, }, "grpc_options": { diff --git a/python/ray/tests/test_autoscaler.py b/python/ray/tests/test_autoscaler.py index 681137775d04..f632a35d2996 100644 --- a/python/ray/tests/test_autoscaler.py +++ b/python/ray/tests/test_autoscaler.py @@ -3515,7 +3515,7 @@ def __init__(self, *args, **kwargs): _internal_kv_initialized=Mock(return_value=False), ): monitor = Monitor( - address="localhost:12345", + address=ray._private.worker.global_worker.gcs_client.address, autoscaling_config="", log_dir=self.tmpdir, ) diff --git a/python/ray/util/client/__init__.py b/python/ray/util/client/__init__.py index 97f4bf2802bc..c74500631cbb 100644 --- a/python/ray/util/client/__init__.py +++ b/python/ray/util/client/__init__.py @@ -4,6 +4,7 @@ from typing import Any, Dict, List, Optional, Tuple import ray._private.ray_constants as ray_constants +from ray._common.network_utils import build_address, get_localhost_ip from ray._private.client_mode_hook import ( _explicitly_disable_client_mode, _explicitly_enable_client_mode, @@ -169,10 +170,10 @@ def init(self, *args, **kwargs): import ray.util.client.server.server as ray_client_server server_handle, address_info = ray_client_server.init_and_serve( - "127.0.0.1", 50051, *args, **kwargs + get_localhost_ip(), 50051, *args, **kwargs ) self._server = server_handle.grpc_server - self.connect("127.0.0.1:50051") + self.connect(build_address(get_localhost_ip(), 50051)) self._connected_with_init = True return address_info diff --git a/python/ray/util/client/ray_client_helpers.py b/python/ray/util/client/ray_client_helpers.py index 1554bd5e1c23..b7eaf8421bf6 100644 --- a/python/ray/util/client/ray_client_helpers.py +++ b/python/ray/util/client/ray_client_helpers.py @@ -4,6 +4,7 @@ import ray as real_ray import ray.util.client.server.server as ray_client_server +from ray._common.network_utils import build_address, get_localhost_ip from ray._private.client_mode_hook import disable_client_hook from ray.job_config import JobConfig from ray.util.client import ray @@ -43,9 +44,9 @@ def ray_start_client_server_pair(metadata=None, ray_connect_handler=None, **kwar with disable_client_hook(): assert not ray.is_initialized() server = ray_client_server.serve( - "127.0.0.1", 50051, ray_connect_handler=ray_connect_handler + get_localhost_ip(), 50051, ray_connect_handler=ray_connect_handler ) - ray.connect("127.0.0.1:50051", metadata=metadata, **kwargs) + ray.connect(build_address(get_localhost_ip(), 50051), metadata=metadata, **kwargs) try: yield ray, server finally: @@ -71,9 +72,9 @@ def ray_connect_handler(job_config=None, **ray_init_kwargs): real_ray.init(address=address) server = ray_client_server.serve( - "127.0.0.1", 50051, ray_connect_handler=ray_connect_handler + get_localhost_ip(), 50051, ray_connect_handler=ray_connect_handler ) - ray.connect("127.0.0.1:50051") + ray.connect(build_address(get_localhost_ip(), 50051)) try: yield ray, server finally: diff --git a/python/ray/util/client/server/proxier.py b/python/ray/util/client/server/proxier.py index 7b1ba13ed81e..7ddb8ad1e44c 100644 --- a/python/ray/util/client/server/proxier.py +++ b/python/ray/util/client/server/proxier.py @@ -21,6 +21,7 @@ import ray.core.generated.runtime_env_agent_pb2 as runtime_env_agent_pb2 from ray._common.network_utils import ( build_address, + get_localhost_ip, is_ipv6, is_localhost, ) @@ -229,7 +230,7 @@ def create_specific_server(self, client_id: str) -> SpecificServer: self.servers.get(client_id) is None ), f"Server already created for Client: {client_id}" - host = "127.0.0.1" + host = get_localhost_ip() port = self._get_unused_port( socket.AF_INET6 if is_ipv6(host) else socket.AF_INET ) @@ -368,7 +369,7 @@ def start_specific_server(self, client_id: str, job_config: JobConfig) -> bool: proc = start_ray_client_server( self.address, - "127.0.0.1", + get_localhost_ip(), specific_server.port, stdout_file=output, stderr_file=error, diff --git a/python/ray/util/client/server/server.py b/python/ray/util/client/server/server.py index d03f35a2afb4..6e6d6aa6a542 100644 --- a/python/ray/util/client/server/server.py +++ b/python/ray/util/client/server/server.py @@ -19,7 +19,7 @@ import ray.core.generated.ray_client_pb2 as ray_client_pb2 import ray.core.generated.ray_client_pb2_grpc as ray_client_pb2_grpc from ray import cloudpickle -from ray._common.network_utils import build_address, is_localhost +from ray._common.network_utils import build_address, get_localhost_ip, is_localhost from ray._private import ray_constants from ray._private.client_mode_hook import disable_client_hook from ray._private.ray_constants import env_integer @@ -787,8 +787,8 @@ def default_connect_handler( ray_client_pb2_grpc.add_RayletDataStreamerServicer_to_server(data_servicer, server) ray_client_pb2_grpc.add_RayletLogStreamerServicer_to_server(logs_servicer, server) if not is_localhost(host): - add_port_to_grpc_server(server, f"127.0.0.1:{port}") - add_port_to_grpc_server(server, f"{host}:{port}") + add_port_to_grpc_server(server, build_address(get_localhost_ip(), port)) + add_port_to_grpc_server(server, build_address(host, port)) current_handle = ClientServerHandle( task_servicer=task_servicer, data_servicer=data_servicer, @@ -855,7 +855,10 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument( - "--host", type=str, default="0.0.0.0", help="Host IP to bind to" + "--host", + type=str, + default=get_localhost_ip(), + help="Host IP to bind to. Defaults to localhost.", ) parser.add_argument("-p", "--port", type=int, default=10001, help="Port to bind to") parser.add_argument( diff --git a/python/ray/util/rpdb.py b/python/ray/util/rpdb.py index 9610e070cbf9..6958df908ced 100644 --- a/python/ray/util/rpdb.py +++ b/python/ray/util/rpdb.py @@ -18,7 +18,7 @@ from typing import Callable import ray -from ray._common.network_utils import build_address, is_ipv6 +from ray._common.network_utils import build_address, get_localhost_ip, is_ipv6 from ray._private import ray_constants from ray.experimental.internal_kv import _internal_kv_del, _internal_kv_put from ray.util.annotations import DeveloperAPI @@ -230,9 +230,9 @@ def _connect_ray_pdb( """ if debugger_external: assert not host, "Cannot specify both host and debugger_external" - host = "0.0.0.0" + host = ray._private.worker.global_worker.node_ip_address elif host is None: - host = os.environ.get("REMOTE_PDB_HOST", "127.0.0.1") + host = os.environ.get("REMOTE_PDB_HOST") or get_localhost_ip() if port is None: port = int(os.environ.get("REMOTE_PDB_PORT", "0")) if quiet is None: diff --git a/python/ray/util/spark/cluster_init.py b/python/ray/util/spark/cluster_init.py index 9c5c696fa459..e652d54a06de 100644 --- a/python/ray/util/spark/cluster_init.py +++ b/python/ray/util/spark/cluster_init.py @@ -551,7 +551,7 @@ def _setup_ray_cluster( port_exclude_list.append(ray_dashboard_agent_port) dashboard_options = [ - "--dashboard-host=0.0.0.0", + f"--dashboard-host={ray_head_ip}", f"--dashboard-port={ray_dashboard_port}", f"--dashboard-agent-listen-port={ray_dashboard_agent_port}", ] diff --git a/src/ray/core_worker/core_worker_process.cc b/src/ray/core_worker/core_worker_process.cc index 80af284e73b6..087bd2e5ab3b 100644 --- a/src/ray/core_worker/core_worker_process.cc +++ b/src/ray/core_worker/core_worker_process.cc @@ -245,10 +245,8 @@ std::shared_ptr CoreWorkerProcessImpl::CreateCoreWorker( std::make_shared(std::move(raylet_address), *client_call_manager_, /*raylet_unavailable_timeout_callback=*/[] {}); - auto core_worker_server = - std::make_unique(WorkerTypeString(options.worker_type), - assigned_port, - options.node_ip_address == "127.0.0.1"); + auto core_worker_server = std::make_unique( + WorkerTypeString(options.worker_type), assigned_port, options.node_ip_address); // Start RPC server after all the task receivers are properly initialized and we have // our assigned port from the raylet. core_worker_server->RegisterService( @@ -853,8 +851,11 @@ CoreWorkerProcessImpl::CoreWorkerProcessImpl(const CoreWorkerOptions &options) // Initialize metrics agent client. // Port > 0 means valid port, -1 means metrics agent not available (minimal install). if (options_.metrics_agent_port > 0) { - metrics_agent_client_ = std::make_unique( - "127.0.0.1", options_.metrics_agent_port, io_service_, *client_call_manager_); + metrics_agent_client_ = + std::make_unique(GetLocalhostIP(), + options_.metrics_agent_port, + io_service_, + *client_call_manager_); metrics_agent_client_->WaitForServerReady([this](const Status &server_status) { if (server_status.ok()) { stats::ConnectOpenCensusExporter(options_.metrics_agent_port); diff --git a/src/ray/core_worker/tests/core_worker_test.cc b/src/ray/core_worker/tests/core_worker_test.cc index fec2218feb6e..504aa7931f2f 100644 --- a/src/ray/core_worker/tests/core_worker_test.cc +++ b/src/ray/core_worker/tests/core_worker_test.cc @@ -52,6 +52,7 @@ #include "ray/pubsub/publisher.h" #include "ray/raylet_ipc_client/fake_raylet_ipc_client.h" #include "ray/raylet_rpc_client/fake_raylet_client.h" +#include "ray/util/network_util.h" namespace ray { namespace core { @@ -116,8 +117,8 @@ class CoreWorkerTest : public ::testing::Test { auto service_handler = std::make_unique(); auto worker_context = std::make_unique( WorkerType::WORKER, WorkerID::FromRandom(), JobID::FromInt(1)); - auto core_worker_server = - std::make_unique(WorkerTypeString(options.worker_type), 0, true); + auto core_worker_server = std::make_unique( + WorkerTypeString(options.worker_type), 0, GetLocalhostIP()); core_worker_server->RegisterService( std::make_unique( io_service_, *service_handler, /*max_active_rpcs_per_handler_=*/-1), diff --git a/src/ray/gcs/gcs_server.cc b/src/ray/gcs/gcs_server.cc index b0f0fc26aebb..3326e656c935 100644 --- a/src/ray/gcs/gcs_server.cc +++ b/src/ray/gcs/gcs_server.cc @@ -68,7 +68,7 @@ GcsServer::GcsServer(const ray::gcs::GcsServerConfig &config, storage_type_(GetStorageType()), rpc_server_(config.grpc_server_name, config.grpc_server_port, - config.node_ip_address == "127.0.0.1", + config.node_ip_address, config.grpc_server_thread_num, /*keepalive_time_ms=*/RayConfig::instance().grpc_keepalive_time_ms()), client_call_manager_(main_service, @@ -991,7 +991,7 @@ void GcsServer::InitMetricsExporter(int metrics_agent_port) { event_aggregator_client_->Connect(metrics_agent_port); metrics_agent_client_ = std::make_unique( - "127.0.0.1", + GetLocalhostIP(), metrics_agent_port, io_context_provider_.GetDefaultIOContext(), client_call_manager_); diff --git a/src/ray/gcs/tests/gcs_health_check_manager_test.cc b/src/ray/gcs/tests/gcs_health_check_manager_test.cc index 8ba51f2723f3..0510fc5e3824 100644 --- a/src/ray/gcs/tests/gcs_health_check_manager_test.cc +++ b/src/ray/gcs/tests/gcs_health_check_manager_test.cc @@ -90,7 +90,8 @@ class GcsHealthCheckManagerTest : public ::testing::Test { auto node_id = NodeID::FromRandom(); auto port = GetFreePort(); RAY_LOG(INFO) << "Get port " << port; - auto server = std::make_shared(node_id.Hex(), port, true); + auto server = + std::make_shared(node_id.Hex(), port, GetLocalhostIP()); auto channel = grpc::CreateChannel(BuildAddress("localhost", port), grpc::InsecureChannelCredentials()); diff --git a/src/ray/gcs_rpc_client/global_state_accessor.cc b/src/ray/gcs_rpc_client/global_state_accessor.cc index c236d8c6099b..b332fa803462 100644 --- a/src/ray/gcs_rpc_client/global_state_accessor.cc +++ b/src/ray/gcs_rpc_client/global_state_accessor.cc @@ -23,6 +23,7 @@ #include #include "ray/common/asio/instrumented_io_context.h" +#include "ray/util/network_util.h" #include "ray/util/time.h" namespace ray { @@ -474,7 +475,7 @@ ray::Status GlobalStateAccessor::GetNodeToConnectForDriver( timeout_ms, rpc::GcsNodeInfo::ALIVE, {selector})); } if (node_infos.empty() && node_ip_address == gcs_address) { - selector.set_node_ip_address("127.0.0.1"); + selector.set_node_ip_address(GetLocalhostIP()); { absl::ReaderMutexLock lock(&mutex_); auto timeout_ms = diff --git a/src/ray/object_manager/object_manager.cc b/src/ray/object_manager/object_manager.cc index df081b1eca06..5539f3e37b9c 100644 --- a/src/ray/object_manager/object_manager.cc +++ b/src/ray/object_manager/object_manager.cc @@ -27,6 +27,7 @@ #include "ray/object_manager/plasma/store_runner.h" #include "ray/object_manager/spilled_object_reader.h" #include "ray/util/exponential_backoff.h" +#include "ray/util/network_util.h" namespace ray { @@ -87,7 +88,7 @@ ObjectManager::ObjectManager( rpc_service_(rpc_service), object_manager_server_("ObjectManager", config_.object_manager_port, - config_.object_manager_address == "127.0.0.1", + config_.object_manager_address, config_.rpc_service_threads_number), client_call_manager_(main_service, /*record_stats=*/true, diff --git a/src/ray/pubsub/tests/python_gcs_subscriber_auth_test.cc b/src/ray/pubsub/tests/python_gcs_subscriber_auth_test.cc index 1b9124fa318d..b247d0a79cd4 100644 --- a/src/ray/pubsub/tests/python_gcs_subscriber_auth_test.cc +++ b/src/ray/pubsub/tests/python_gcs_subscriber_auth_test.cc @@ -25,6 +25,7 @@ #include "ray/rpc/authentication/authentication_token_loader.h" #include "ray/rpc/grpc_server.h" #include "ray/util/env.h" +#include "ray/util/network_util.h" #include "src/ray/protobuf/gcs_service.grpc.pb.h" namespace ray { @@ -118,7 +119,7 @@ class PythonGcsSubscriberAuthTest : public ::testing::Test { server_ = std::make_unique("test-gcs-server", 0, // Random port - true, + GetLocalhostIP(), 1, 7200000, auth_token); diff --git a/src/ray/raylet/main.cc b/src/ray/raylet/main.cc index fa13b86c966c..4e346501bd51 100644 --- a/src/ray/raylet/main.cc +++ b/src/ray/raylet/main.cc @@ -52,6 +52,7 @@ #include "ray/stats/tag_defs.h" #include "ray/util/cmd_line_utils.h" #include "ray/util/event.h" +#include "ray/util/network_util.h" #include "ray/util/process.h" #include "ray/util/raii.h" #include "ray/util/stream_redirection.h" @@ -1066,8 +1067,11 @@ int main(int argc, char *argv[]) { // -1 means metrics agent is not available (minimal install). int actual_metrics_agent_port = node_manager->GetMetricsAgentPort(); if (actual_metrics_agent_port > 0) { - metrics_agent_client = std::make_unique( - "127.0.0.1", actual_metrics_agent_port, main_service, *client_call_manager); + metrics_agent_client = + std::make_unique(ray::GetLocalhostIP(), + actual_metrics_agent_port, + main_service, + *client_call_manager); metrics_agent_client->WaitForServerReady( [actual_metrics_agent_port](const ray::Status &server_status) { if (server_status.ok()) { diff --git a/src/ray/raylet/node_manager.cc b/src/ray/raylet/node_manager.cc index c73d63309819..7f5d58f18d3c 100644 --- a/src/ray/raylet/node_manager.cc +++ b/src/ray/raylet/node_manager.cc @@ -214,9 +214,8 @@ NodeManager::NodeManager( std::chrono::milliseconds(delay_ms))); }), runtime_env_agent_port_(config.runtime_env_agent_port), - node_manager_server_("NodeManager", - config.node_manager_port, - config.node_manager_address == "127.0.0.1"), + node_manager_server_( + "NodeManager", config.node_manager_port, config.node_manager_address), local_object_manager_(local_object_manager), leased_workers_(leased_workers), local_gc_interval_ns_(RayConfig::instance().local_gc_interval_s() * 1e9), diff --git a/src/ray/raylet/tests/node_manager_test.cc b/src/ray/raylet/tests/node_manager_test.cc index 884e710ead16..056a98966024 100644 --- a/src/ray/raylet/tests/node_manager_test.cc +++ b/src/ray/raylet/tests/node_manager_test.cc @@ -48,6 +48,7 @@ #include "ray/raylet/tests/util.h" #include "ray/raylet_rpc_client/fake_raylet_client.h" #include "ray/rpc/utils.h" +#include "ray/util/network_util.h" namespace ray::raylet { using ::testing::_; @@ -314,6 +315,7 @@ class NodeManagerTest : public ::testing::Test { })"); NodeManagerConfig node_manager_config{}; + node_manager_config.node_manager_address = GetLocalhostIP(); node_manager_config.maximum_startup_concurrency = 1; node_manager_config.store_socket_name = "test_store_socket"; node_manager_config.resource_config = ResourceSet( diff --git a/src/ray/rpc/authentication/tests/grpc_auth_token_tests.cc b/src/ray/rpc/authentication/tests/grpc_auth_token_tests.cc index b32056513e1c..a85b1a98aeca 100644 --- a/src/ray/rpc/authentication/tests/grpc_auth_token_tests.cc +++ b/src/ray/rpc/authentication/tests/grpc_auth_token_tests.cc @@ -24,6 +24,7 @@ #include "ray/rpc/grpc_server.h" #include "ray/rpc/tests/grpc_test_common.h" #include "ray/util/env.h" +#include "ray/util/network_util.h" #include "src/ray/protobuf/test_service.grpc.pb.h" namespace ray { @@ -73,7 +74,8 @@ class TestGrpcServerClientTokenAuthFixture : public ::testing::Test { // Explicitly set empty token (no auth required) server_auth_token = std::make_shared(""); } - grpc_server_.reset(new GrpcServer("test", 0, true, 1, 7200000, server_auth_token)); + grpc_server_.reset( + new GrpcServer("test", 0, GetLocalhostIP(), 1, 7200000, server_auth_token)); grpc_server_->RegisterService( std::make_unique(handler_io_service_, test_service_handler_), false); diff --git a/src/ray/rpc/event_aggregator_client.h b/src/ray/rpc/event_aggregator_client.h index 643de9ac9673..643178f38c93 100644 --- a/src/ray/rpc/event_aggregator_client.h +++ b/src/ray/rpc/event_aggregator_client.h @@ -22,6 +22,7 @@ #include "ray/rpc/grpc_client.h" #include "ray/util/logging.h" +#include "ray/util/network_util.h" #include "src/ray/protobuf/events_event_aggregator_service.grpc.pb.h" #include "src/ray/protobuf/events_event_aggregator_service.pb.h" @@ -62,7 +63,7 @@ class EventAggregatorClientImpl : public EventAggregatorClient { void Connect(const int port) override { grpc_client_ = std::make_unique>( - "127.0.0.1", port, *client_call_manager_); + GetLocalhostIP(), port, *client_call_manager_); } VOID_RPC_CLIENT_METHOD(rpc::events::EventAggregatorService, diff --git a/src/ray/rpc/grpc_server.cc b/src/ray/rpc/grpc_server.cc index 5809cc005783..4aff0c4dc888 100644 --- a/src/ray/rpc/grpc_server.cc +++ b/src/ray/rpc/grpc_server.cc @@ -64,8 +64,7 @@ void GrpcServer::Shutdown() { void GrpcServer::Run() { uint32_t specified_port = port_; - std::string server_address = - BuildAddress((listen_to_localhost_only_ ? "127.0.0.1" : "0.0.0.0"), port_); + std::string server_address = BuildAddress(bind_address_, port_); grpc::ServerBuilder builder; // Disable the SO_REUSEPORT option. We don't need it in ray. If the option is enabled // (default behavior in grpc), we may see multiple workers listen on the same port and diff --git a/src/ray/rpc/grpc_server.h b/src/ray/rpc/grpc_server.h index ebf47c05a497..35e164fc3626 100644 --- a/src/ray/rpc/grpc_server.h +++ b/src/ray/rpc/grpc_server.h @@ -90,16 +90,17 @@ class GrpcServer { /// \param[in] name Name of this server, used for logging and debugging purpose. /// \param[in] port The port to bind this server to. If it's 0, a random available port /// will be chosen. + /// \param[in] bind_address The IP address to bind to. /// GrpcServer(std::string name, const uint32_t port, - bool listen_to_localhost_only, + std::string bind_address, int num_threads = 1, int64_t keepalive_time_ms = 7200000, /*2 hours, grpc default*/ std::shared_ptr auth_token = nullptr) : name_(std::move(name)), port_(port), - listen_to_localhost_only_(listen_to_localhost_only), + bind_address_(std::move(bind_address)), is_shutdown_(true), num_threads_(num_threads), keepalive_time_ms_(keepalive_time_ms) { @@ -166,9 +167,8 @@ class GrpcServer { /// Port of this server. int port_; - /// Listen to localhost (127.0.0.1) only if it's true, otherwise listen to all network - /// interfaces (0.0.0.0) - const bool listen_to_localhost_only_; + /// The IP address to bind this server to. + const std::string bind_address_; /// Token representing ID of this cluster. ClusterID cluster_id_; diff --git a/src/ray/rpc/tests/grpc_server_client_test.cc b/src/ray/rpc/tests/grpc_server_client_test.cc index 09a168eac9b5..ab49dae03a01 100644 --- a/src/ray/rpc/tests/grpc_server_client_test.cc +++ b/src/ray/rpc/tests/grpc_server_client_test.cc @@ -20,6 +20,7 @@ #include "ray/rpc/grpc_client.h" #include "ray/rpc/grpc_server.h" #include "ray/rpc/tests/grpc_test_common.h" +#include "ray/util/network_util.h" #include "src/ray/protobuf/test_service.grpc.pb.h" namespace ray { @@ -35,7 +36,7 @@ class TestGrpcServerClientFixture : public ::testing::Test { handler_io_service_work_(handler_io_service_.get_executor()); handler_io_service_.run(); }); - grpc_server_.reset(new GrpcServer("test", 0, true)); + grpc_server_.reset(new GrpcServer("test", 0, GetLocalhostIP())); grpc_server_->RegisterService( std::make_unique(handler_io_service_, test_service_handler_), false); diff --git a/src/ray/stats/metric_exporter.cc b/src/ray/stats/metric_exporter.cc index 46998bb8f3f1..0f484e318941 100644 --- a/src/ray/stats/metric_exporter.cc +++ b/src/ray/stats/metric_exporter.cc @@ -16,6 +16,8 @@ #include +#include "ray/util/network_util.h" + namespace ray { namespace stats { @@ -59,10 +61,10 @@ void OpenCensusProtoExporter::Connect(int port) { << "Cannot Connect without io_service. Use the lazy loading constructor."; client_call_manager_ = std::make_unique( *io_service_, /*record_stats=*/true, /*local_address=*/"always local"); - // The MetricsAgentClient is always started with 127.0.0.1 so we don't need to pass + // The MetricsAgentClient is always started with local address so we don't need to pass // the local address to this client call manager to tell it's local. client_ = std::make_shared( - "127.0.0.1", port, *io_service_, *client_call_manager_); + GetLocalhostIP(), port, *io_service_, *client_call_manager_); } /// Hack. We want to add GlobalTags to all our metrics, but gRPC OpenCencus plugin is not diff --git a/src/ray/util/network_util.cc b/src/ray/util/network_util.cc index 53d3cb2df19b..881886c9ffd1 100644 --- a/src/ray/util/network_util.cc +++ b/src/ray/util/network_util.cc @@ -254,6 +254,40 @@ std::shared_ptr> ParseURL(std::str return result; } +std::string GetLocalhostIP() { + static const std::string localhost_ip = []() { + // Try IPv4 first, then IPv6 localhost resolution + for (auto family : {boost::asio::ip::tcp::v4(), boost::asio::ip::tcp::v6()}) { + try { + boost::asio::io_context io_context; + boost::asio::ip::tcp::resolver resolver(io_context); + boost::asio::ip::tcp::resolver::query query(family, "localhost", ""); + auto endpoints = resolver.resolve(query); + if (endpoints != boost::asio::ip::tcp::resolver::iterator()) { + return endpoints->endpoint().address().to_string(); + } + } catch (const boost::system::system_error &) { + // Continue to next family + continue; + } + } + // Final fallback to IPv4 loopback + return std::string("127.0.0.1"); + }(); + return localhost_ip; +} + +std::string GetAllInterfacesIP() { + static const std::string all_interfaces_ip = []() { + std::string localhost = GetLocalhostIP(); + if (localhost == "::1" || localhost.find(':') != std::string::npos) { + return std::string("::"); + } + return std::string("0.0.0.0"); + }(); + return all_interfaces_ip; +} + std::string GetNodeIpAddressFromPerspective(const std::optional &address) { std::vector> test_addresses; if (address.has_value()) { diff --git a/src/ray/util/network_util.h b/src/ray/util/network_util.h index 1e80ce9ea2eb..236d08489556 100644 --- a/src/ray/util/network_util.h +++ b/src/ray/util/network_util.h @@ -64,6 +64,16 @@ std::string GetNodeIpAddressFromPerspective( /// \return true if the host is resolved to IPv6, false if IPv4. bool IsIPv6(const std::string &host); +/// Get localhost loopback IP with IPv4/IPv6 support. +/// Tries to resolve "localhost" to IPv4 first, then IPv6. +/// \return The localhost loopback IP (e.g., "127.0.0.1" or "::1"). +std::string GetLocalhostIP(); + +/// Get the IP address to bind to all network interfaces. +/// Tries to resolve "localhost" to IPv4 first, then IPv6. +/// \return "0.0.0.0" for IPv4 or "::" for IPv6 +std::string GetAllInterfacesIP(); + /// Check whether the given port is available for the specified address family. /// Notice, the check could be non-authentic if there're concurrent port assignments. /// \param family The address family to check (AF_INET for IPv4, AF_INET6 for IPv6).