Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions python/ray/_private/authentication/authentication_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,15 @@ def is_token_auth_enabled() -> bool:
"""Check if token authentication is enabled.

Returns:
bool: True if AUTH_MODE is set to "token", False otherwise
bool: True if AUTH_MODE is set to "token" or "k8s", False otherwise
"""
if not _RAYLET_AVAILABLE:
return False
return get_authentication_mode() == AuthenticationMode.TOKEN

return get_authentication_mode() in {
AuthenticationMode.TOKEN,
AuthenticationMode.K8S,
}


def validate_request_token(auth_header: str) -> bool:
Expand Down
2 changes: 2 additions & 0 deletions python/ray/dashboard/http_server_head.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,8 @@ async def get_authentication_mode(self, req) -> aiohttp.web.Response:
mode = get_authentication_mode()
if mode == AuthenticationMode.TOKEN:
mode_str = "token"
elif mode == AuthenticationMode.K8S:
mode_str = "k8s"
else:
mode_str = "disabled"

Expand Down
7 changes: 7 additions & 0 deletions python/ray/includes/rpc_token_authentication.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ cdef extern from "ray/rpc/authentication/authentication_mode.h" namespace "ray::
cdef enum CAuthenticationMode "ray::rpc::AuthenticationMode":
DISABLED "ray::rpc::AuthenticationMode::DISABLED"
TOKEN "ray::rpc::AuthenticationMode::TOKEN"
K8S "ray::rpc::AuthenticationMode::K8S"

CAuthenticationMode GetAuthenticationMode()

Expand All @@ -28,3 +29,9 @@ cdef extern from "ray/rpc/authentication/authentication_token_loader.h" namespac
c_bool HasToken()
void ResetCache()
optional[CAuthenticationToken] GetToken()

cdef extern from "ray/rpc/authentication/authentication_token_validator.h" namespace "ray::rpc" nogil:
cdef cppclass CAuthenticationTokenValidator "ray::rpc::AuthenticationTokenValidator":
@staticmethod
CAuthenticationTokenValidator& instance()
c_bool ValidateToken(const optional[CAuthenticationToken]& expected_token, const CAuthenticationToken& provided_token)
29 changes: 19 additions & 10 deletions python/ray/includes/rpc_token_authentication.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -3,48 +3,57 @@ from ray.includes.rpc_token_authentication cimport (
GetAuthenticationMode,
CAuthenticationToken,
CAuthenticationTokenLoader,
CAuthenticationTokenValidator,
)
from ray._private.authentication.authentication_constants import AUTHORIZATION_HEADER_NAME
import logging

logger = logging.getLogger(__name__)


# Authentication mode enum exposed to Python
class AuthenticationMode:
DISABLED = CAuthenticationMode.DISABLED
TOKEN = CAuthenticationMode.TOKEN
K8S = CAuthenticationMode.K8S


def get_authentication_mode():
"""Get the current authentication mode.

Returns:
AuthenticationMode enum value (DISABLED or TOKEN)
AuthenticationMode enum value (DISABLED or TOKEN or K8S)
"""
return GetAuthenticationMode()


def validate_authentication_token(provided_token: str) -> bool:
"""Validate provided authentication token against expected token.
"""Validate provided authentication token.

For TOKEN mode, compares against the expected token.
For K8S mode, validates against the Kubernetes API.

Args:
provided_token: Full authorization header value (e.g., "Bearer <token>")

Returns:
bool: True if tokens match, False otherwise
bool: True if token is valid, False otherwise
"""
# Get expected token from loader
cdef optional[CAuthenticationToken] expected_opt = CAuthenticationTokenLoader.instance().GetToken()
cdef optional[CAuthenticationToken] expected_opt
cdef CAuthenticationToken provided

if not expected_opt.has_value():
return False
if get_authentication_mode() == CAuthenticationMode.TOKEN:
expected_opt = CAuthenticationTokenLoader.instance().GetToken()
if not expected_opt.has_value():
return False

# Parse provided token from Bearer format
cdef CAuthenticationToken provided = CAuthenticationToken.FromMetadata(provided_token.encode())
provided = CAuthenticationToken.FromMetadata(provided_token.encode())

if provided.empty():
return False

# Use constant-time comparison from C++
return expected_opt.value().Equals(provided)
return CAuthenticationTokenValidator.instance().ValidateToken(expected_opt, provided)


class AuthenticationTokenLoader:
Expand Down
3 changes: 3 additions & 0 deletions src/ray/rpc/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,10 @@ ray_cc_library(
"//src/ray/common:id",
"//src/ray/common:ray_config",
"//src/ray/common:status",
"//src/ray/rpc/authentication:authentication_mode",
"//src/ray/rpc/authentication:authentication_token",
"//src/ray/rpc/authentication:authentication_token_loader",
"//src/ray/rpc/authentication:authentication_token_validator",
"//src/ray/stats:stats_metric",
"@com_github_grpc_grpc//:grpc++",
],
Expand Down
34 changes: 34 additions & 0 deletions src/ray/rpc/authentication/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,27 @@ ray_cc_library(
],
)

ray_cc_library(
name = "k8s_constants",
hdrs = ["k8s_constants.h"],
visibility = ["//visibility:public"],
)

ray_cc_library(
name = "k8s_util",
srcs = ["k8s_util.cc"],
hdrs = ["k8s_util.h"],
visibility = ["//visibility:public"],
deps = [
":authentication_token",
":k8s_constants",
"//src/ray/util:logging",
"@boost//:asio_ssl",
"@boost//:beast",
"@nlohmann_json",
],
)

ray_cc_library(
name = "authentication_token_loader",
srcs = ["authentication_token_loader.cc"],
Expand All @@ -29,6 +50,7 @@ ray_cc_library(
deps = [
":authentication_mode",
":authentication_token",
":k8s_constants",
"//src/ray/util:logging",
],
)
Expand All @@ -45,3 +67,15 @@ ray_cc_library(
"@com_github_grpc_grpc//:grpc++",
],
)

ray_cc_library(
name = "authentication_token_validator",
srcs = ["authentication_token_validator.cc"],
hdrs = ["authentication_token_validator.h"],
visibility = ["//visibility:public"],
deps = [
":authentication_mode",
":authentication_token",
":k8s_util",
],
)
7 changes: 7 additions & 0 deletions src/ray/rpc/authentication/authentication_mode.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,17 @@ AuthenticationMode GetAuthenticationMode() {

if (auth_mode_lower == "token") {
return AuthenticationMode::TOKEN;
} else if (auth_mode_lower == "k8s") {
return AuthenticationMode::K8S;
} else {
return AuthenticationMode::DISABLED;
}
}

bool RequiresTokenAuthentication() {
AuthenticationMode auth_mode = GetAuthenticationMode();
return (auth_mode == AuthenticationMode::TOKEN || auth_mode == AuthenticationMode::K8S);
}

} // namespace rpc
} // namespace ray
5 changes: 5 additions & 0 deletions src/ray/rpc/authentication/authentication_mode.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,17 @@ namespace rpc {
enum class AuthenticationMode {
DISABLED,
TOKEN,
K8S,
};

/// Get the authentication mode from the RayConfig.
/// \return The authentication mode enum value. returns AuthenticationMode::DISABLED if
/// the authentication mode is not set or is invalid.
AuthenticationMode GetAuthenticationMode();

// Checks authentication mode and returns whether token authentication is required.
// \return bool
bool RequiresTokenAuthentication();

} // namespace rpc
} // namespace ray
15 changes: 15 additions & 0 deletions src/ray/rpc/authentication/authentication_token.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,14 @@ class AuthenticationToken {
return std::string(secret_.begin(), secret_.end());
}

/// Get token hash
/// @return Hash of the token value
std::size_t ToHash() const {
// TODO(andrewsykim): consider using a more secure hashing algorithm like SHA256
// before documenting this feature in Ray docs.
return std::hash<std::string>()(std::string(secret_.begin(), secret_.end()));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could you add a TODO here to use a more secure hashing algorithm? or if possible I think we should start with SHA256 and then update it to a better one based on security teams suggestion.

I am just worried that this will get shipped to users before we update to use a more secure algorithm (though the impact of this is probably not that high)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

noted, from discussion with Edward this change is going to be undocumented for a couple of releases as we test it. I added a TODO to update the hashing algorithm before we document this feature. Please let me know what suggestion you get from your security team.

Copy link
Member Author

@andrewsykim andrewsykim Nov 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the record, I tried to update the implementation to use SHA256, but there's a definition conflict between boringssl and the ray/third_party/sha256:

    In file included from external/boringssl/src/include/openssl/conf.h:60,
                     from external/boost/boost/asio/ssl/detail/openssl_types.hpp:23,
                     from external/boost/boost/asio/ssl/error.hpp:20,
                     from src/ray/rpc/authentication/k8s_util.cc:19:
    external/boringssl/src/include/openssl/base.h:353:32: error: conflicting declaration 'typedef struct sha256_state_st SHA256_CTX'

So this change will require renaming some definitions in ray/third_party/sha256 or just using the openssl implementation. Will probably stick to the former since there's only one other caller to the third_party lib in src/ray/common/id.cc.

Opened a separate PR that includes the changes in ray/third_party/sha256 c3a8ea3

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#58497 (comment) we can go with Blake3

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Noted, will update #58593 to use BLAKE3

}

/// Create AuthenticationToken from gRPC metadata value
/// Strips "Bearer " prefix and creates token object
/// @param metadata_value The raw value from server metadata (should include "Bearer "
Expand Down Expand Up @@ -173,5 +181,12 @@ class AuthenticationToken {
}
};

// Hash function for AuthenticationToken
struct AuthenticationTokenHash {
std::size_t operator()(const AuthenticationToken &token) const {
return token.ToHash();
}
};
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Timing Side-Channel Exposes Token Secrets

The AuthenticationTokenHash uses ToValue() which exposes the raw token in the hash computation. This defeats the constant-time comparison security of AuthenticationToken and could leak token information through timing side-channels when tokens are used as hash map keys. The hash function should use a timing-safe approach or avoid exposing the raw token value.

Fix in Cursor Fix in Web

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sampan-s-nayak do you know if this is an actual concern when hashing?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if tokens are all of fixed lengths then we may not need to worry about timing attacks (we havent finalised the token generation logic yet, so for the time being we can just add a note here stating our assumption that we expect tokens to all be of the same length)

but instead of std::hash we should consider a better and more secure algorithm like SHA256 (@edoakes I think we should discuss this a bit with our security team as well).

and instead of retrieving the raw token outside the authenticationToken class and generating hash I would instead prefer exposing a toHash() function in AuthenticationToken() class and just call that within AuthenticationTokenHash() struct.

Copy link
Member Author

@andrewsykim andrewsykim Nov 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a ToHash method to AuthenticationToken and called it in AuthenticationTokenHash(). I'm going to keep the hashing implementation to use std::hash, but happy to update to SHA256 in a follow-up PR after you consult with your security team.

Copy link
Contributor

@sampan-s-nayak sampan-s-nayak Nov 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@richo-anyscale who is part of the security team suggested that we use BLAKE3 we can switch to using that in this pr if it is straightforward or do it in a separate pr


} // namespace rpc
} // namespace ray
24 changes: 19 additions & 5 deletions src/ray/rpc/authentication/authentication_token_loader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <string>
#include <utility>

#include "ray/rpc/authentication/k8s_constants.h"
#include "ray/util/logging.h"

#ifdef _WIN32
Expand Down Expand Up @@ -47,8 +48,8 @@ std::optional<AuthenticationToken> AuthenticationTokenLoader::GetToken() {
return cached_token_;
}

// If token auth is not enabled, return std::nullopt
if (GetAuthenticationMode() != AuthenticationMode::TOKEN) {
// If token or k8s auth is not enabled, return std::nullopt
if (!RequiresTokenAuthentication()) {
cached_token_ = std::nullopt;
return std::nullopt;
}
Expand Down Expand Up @@ -77,8 +78,8 @@ bool AuthenticationTokenLoader::HasToken() {
return !cached_token_->empty();
}

// If token auth is not enabled, no token needed
if (GetAuthenticationMode() != AuthenticationMode::TOKEN) {
// If token or k8s auth is not enabled, no token needed
if (!RequiresTokenAuthentication()) {
cached_token_ = std::nullopt;
return false;
}
Expand Down Expand Up @@ -135,7 +136,20 @@ AuthenticationToken AuthenticationTokenLoader::LoadTokenFromSources() {
}
}

// Precedence 3: Default token path ~/.ray/auth_token
// Precedence 3 (auth_mode=k8s only): Load Kubernetes service account token
if (GetAuthenticationMode() == AuthenticationMode::K8S) {
std::string token_str = TrimWhitespace(ReadTokenFromFile(k8s::kK8sSaTokenPath));
if (!token_str.empty()) {
RAY_LOG(DEBUG)
<< "Loaded authentication token from Kubernetes service account path: "
<< k8s::kK8sSaTokenPath;
return AuthenticationToken(token_str);
}
RAY_LOG(DEBUG) << "Kubernetes service account token not found or empty at: "
<< k8s::kK8sSaTokenPath;
}

// Precedence 4: Default token path ~/.ray/auth_token
std::string default_path = GetDefaultTokenPath();
std::string token_str = TrimWhitespace(ReadTokenFromFile(default_path));
if (!token_str.empty()) {
Expand Down
86 changes: 86 additions & 0 deletions src/ray/rpc/authentication/authentication_token_validator.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// Copyright 2025 The Ray Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions
// and limitations under the License.

#include "ray/rpc/authentication/authentication_token_validator.h"

#include "ray/rpc/authentication/authentication_mode.h"
#include "ray/rpc/authentication/k8s_util.h"
#include "ray/util/logging.h"

namespace ray {
namespace rpc {

const std::chrono::minutes kCacheTTL(5);

AuthenticationTokenValidator &AuthenticationTokenValidator::instance() {
static AuthenticationTokenValidator instance;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: we can probably avoid the singleton pattern here and just define the helper function directly in this file.

Copy link
Member Author

@andrewsykim andrewsykim Nov 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assumed we need singleton for the cache state, if we don't use singleton the function and cache would be static / global for the class.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sampan-s-nayak I kept the singleton for now but I can fix this in a follow-up PR if you think it's worth doing

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assumed we need singleton for the cache state, if we don't use singleton the function and cache would be static / global for the class.

maybe we can create a singleton k8s token cache class and keep the validator as just a plain old function?

return instance;
}

bool AuthenticationTokenValidator::ValidateToken(
const std::optional<AuthenticationToken> &expected_token,
const AuthenticationToken &provided_token) {
if (GetAuthenticationMode() == AuthenticationMode::TOKEN) {
RAY_CHECK(expected_token.has_value() && !expected_token->empty())
<< "Ray token authentication is enabled but expected token is empty";

return expected_token->Equals(provided_token);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Empty Optional Dereference: Crash Hazard

In TOKEN mode, expected_token is dereferenced without checking if it has a value. While the current code flow may prevent this, the function accepts std::optional and should validate it before dereferencing to avoid potential crashes if called with an empty optional.

Fix in Cursor Fix in Web

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

regarding #58497 (comment), will probably add it back

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added:

RAY_CHECK(expected_token.has_value() && !expected_token->empty())
        << "Ray token authentication is enabled but expected token is empty";

}

if (GetAuthenticationMode() == AuthenticationMode::K8S) {
std::call_once(k8s::k8s_client_config_flag, k8s::InitK8sClientConfig);
if (!k8s::k8s_client_initialized) {
RAY_LOG(WARNING) << "Kubernetes client not initialized, K8s authentication failed.";
return false;
}

// Check cache first.
{
std::lock_guard<std::mutex> lock(k8s_token_cache_mutex_);
auto it = k8s_token_cache_.find(provided_token);
if (it != k8s_token_cache_.end()) {
if (std::chrono::steady_clock::now() < it->second.expiration) {
RAY_LOG(DEBUG) << "K8s token found in cache and is valid.";
return it->second.allowed;
} else {
RAY_LOG(DEBUG) << "K8s token in cache expired, removing from cache.";
k8s_token_cache_.erase(it);
}
}
}

bool is_allowed = false;
is_allowed = k8s::ValidateToken(provided_token);

// Only cache validated tokens for now. We don't want to invalidate a token
// due to unrelated errors from Kubernetes API server. This has the downside of
// causing more load if an unauthenticated client continues to make calls.
// TODO(andrewsykim): cache invalid tokens once k8s::ValidateToken can distinguish
// between invalid token errors and server errors.
if (is_allowed) {
std::lock_guard<std::mutex> lock(k8s_token_cache_mutex_);
k8s_token_cache_[provided_token] = {is_allowed,
std::chrono::steady_clock::now() + kCacheTTL};
RAY_LOG(DEBUG) << "K8s token validated and saved to cache.";
}

return is_allowed;
}

RAY_LOG(DEBUG) << "Authentication mode is disabled, token considered valid.";
return true;
}

} // namespace rpc
} // namespace ray
Loading