Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
9c98194
add external scaler enabled flag
harshit-anyscale Oct 15, 2025
a685eb7
fix tests
harshit-anyscale Oct 16, 2025
7c1ef8f
review changes
harshit-anyscale Oct 27, 2025
0f6b1b0
merge master
harshit-anyscale Oct 27, 2025
826b69c
merge master
harshit-anyscale Oct 27, 2025
60beb9f
review changes
harshit-anyscale Oct 27, 2025
dee83f1
fix bug
harshit-anyscale Oct 27, 2025
e4d315d
Merge branch 'master' into add-external-scaler-c3
harshit-anyscale Oct 27, 2025
9229b97
review changes
harshit-anyscale Oct 28, 2025
4d30ab8
review changes
harshit-anyscale Oct 28, 2025
0492aed
Merge branch 'master' into add-external-scaler-c3
harshit-anyscale Oct 28, 2025
c92bb27
fix tests
harshit-anyscale Oct 28, 2025
83fd9d1
changes in java side code
harshit-anyscale Oct 29, 2025
cf556db
fix java tests
harshit-anyscale Nov 8, 2025
209a1fd
review changes
harshit-anyscale Nov 8, 2025
c87d182
review changes
harshit-anyscale Nov 8, 2025
f7e2f94
add more tests
harshit-anyscale Nov 11, 2025
2fed452
add more tests
harshit-anyscale Nov 11, 2025
ebeb0d2
lint changes
harshit-anyscale Nov 11, 2025
fda546e
Merge branch 'master' into add-external-scaler-c3
harshit-anyscale Nov 11, 2025
539d486
indentation changes
harshit-anyscale Nov 11, 2025
dbe3c0f
fix tests
harshit-anyscale Nov 12, 2025
8a4b589
Merge branch 'master' into add-external-scaler-c3
harshit-anyscale Nov 12, 2025
80e8cef
remove setting default value
harshit-anyscale Nov 17, 2025
c6c1b4c
Merge branch 'master' into add-external-scaler-c3
harshit-anyscale Nov 17, 2025
49eb96a
review changes
harshit-anyscale Nov 24, 2025
810fbcf
merge master
harshit-anyscale Nov 24, 2025
c0aa403
lint changes
harshit-anyscale Nov 24, 2025
7838218
fix tests
harshit-anyscale Nov 24, 2025
ad52517
review changes
harshit-anyscale Nov 24, 2025
f7ef18f
Merge branch 'master' into add-external-scaler-c3
harshit-anyscale Nov 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions java/serve/src/main/java/io/ray/serve/api/Serve.java
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ public static Deployment getDeployment(String name) {
* @return A handle that can be used to call the application.
*/
public static DeploymentHandle run(Application target) {
return run(target, true, Constants.SERVE_DEFAULT_APP_NAME, null, null);
return run(target, true, Constants.SERVE_DEFAULT_APP_NAME, null, null, false);
}

/**
Expand All @@ -320,14 +320,17 @@ public static DeploymentHandle run(Application target) {
* cluster (it will delete all others).
* @param routePrefix Route prefix for HTTP requests. Defaults to '/'.
* @param config
* @param externalScalerEnabled If true, indicates that an external autoscaler will manage replica
* scaling for this application. Defaults to false.
* @return A handle that can be used to call the application.
*/
public static DeploymentHandle run(
Application target,
boolean blocking,
String name,
String routePrefix,
Map<String, String> config) {
Map<String, String> config,
boolean externalScalerEnabled) {

if (StringUtils.isBlank(name)) {
throw new RayServeException("Application name must a non-empty string.");
Expand Down Expand Up @@ -355,7 +358,13 @@ public static DeploymentHandle run(
: RandomStringUtils.randomAlphabetic(6));
}

client.deployApplication(name, routePrefix, deployments, ingressDeployment.getName(), blocking);
client.deployApplication(
name,
routePrefix,
deployments,
ingressDeployment.getName(),
blocking,
externalScalerEnabled);
return client.getDeploymentHandle(ingressDeployment.getName(), name, true);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import io.ray.serve.deployment.Deployment;
import io.ray.serve.deployment.DeploymentRoute;
import io.ray.serve.exception.RayServeException;
import io.ray.serve.generated.ApplicationArgs;
import io.ray.serve.generated.ApplicationStatus;
import io.ray.serve.generated.DeploymentArgs;
import io.ray.serve.generated.EndpointInfo;
Expand Down Expand Up @@ -166,13 +167,16 @@ public BaseActorHandle getController() {
* @param ingressDeploymentName name of the ingress deployment (the one that is exposed over
* HTTP).
* @param blocking Wait for the applications to be deployed or not.
* @param externalScalerEnabled If true, indicates that an external autoscaler will manage replica
* scaling for this application.
*/
public void deployApplication(
String name,
String routePrefix,
List<Deployment> deployments,
String ingressDeploymentName,
boolean blocking) {
boolean blocking,
boolean externalScalerEnabled) {

Object[] deploymentArgsArray = new Object[deployments.size()];

Expand All @@ -192,8 +196,13 @@ public void deployApplication(
deploymentArgsArray[i] = deploymentArgs.build().toByteArray();
}

ApplicationArgs.Builder applicationArgs =
ApplicationArgs.newBuilder().setExternalScalerEnabled(externalScalerEnabled);
byte[] applicationArgsBytes = applicationArgs.build().toByteArray();

((PyActorHandle) controller)
.task(PyActorMethod.of("deploy_application"), name, deploymentArgsArray)
.task(
PyActorMethod.of("deploy_application"), name, deploymentArgsArray, applicationArgsBytes)
.remote()
.get();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,4 +121,100 @@ public void userConfigTest() {
Serve.getAppHandle(deploymentName).method("call").remote("6").result(), "echo_6_new");
// TOOD update user config
}

@Test
public void externalScalerEnabledTest() throws IOException {
// Test deploying with externalScalerEnabled=true, then scaling via the HTTP REST API
String appName = "externalScalerApp";
String deploymentName = "exampleEcho";
Application deployment =
Serve.deployment()
.setName(deploymentName)
.setDeploymentDef(ExampleEchoDeployment.class.getName())
.setNumReplicas(1)
.setUserConfig("_test")
.bind("echo_");

DeploymentHandle handle = Serve.run(deployment, true, appName, "/", null, true);
Assert.assertEquals(handle.method("call").remote("5").result(), "echo_5_test");
Assert.assertTrue((boolean) handle.method("checkHealth").remote().result());

// Now test scaling using the HTTP REST API endpoint
// This should succeed because external_scaler_enabled=true
HttpClient httpClient = HttpClientBuilder.create().build();
String scaleUrl =
String.format(
"http://localhost:8265/api/v1/applications/%s/deployments/%s/scale",
appName, deploymentName);

HttpPost scaleRequest = new HttpPost(scaleUrl);
scaleRequest.setEntity(new StringEntity("{\"target_num_replicas\": 2}"));
scaleRequest.setHeader("Content-Type", "application/json");

try (CloseableHttpResponse response =
(CloseableHttpResponse) httpClient.execute(scaleRequest)) {
int statusCode = response.getCode();
String responseBody =
new String(EntityUtils.toByteArray(response.getEntity()), StandardCharsets.UTF_8);

// Should succeed with status 200
Assert.assertEquals(
statusCode,
200,
"Scaling should succeed when external_scaler_enabled=true. Response: " + responseBody);
Assert.assertTrue(
responseBody.contains("Scaling request received"),
"Response should contain success message. Response: " + responseBody);
}
}

@Test
public void externalScalerDisabledTest() throws IOException {
// Test deploying with externalScalerEnabled=false, then attempting to scale via HTTP REST API
// This should fail with status 412 (Precondition Failed) and ExternalScalerNotEnabledError
String appName = "normalApp";
String deploymentName = "exampleEcho";
Application deployment =
Serve.deployment()
.setName(deploymentName)
.setDeploymentDef(ExampleEchoDeployment.class.getName())
.setNumReplicas(1)
.setUserConfig("_test")
.bind("echo_");

DeploymentHandle handle = Serve.run(deployment, true, appName, "/", null, false);
Assert.assertEquals(handle.method("call").remote("7").result(), "echo_7_test");
Assert.assertTrue((boolean) handle.method("checkHealth").remote().result());

// Now test scaling using the HTTP REST API endpoint
// This should FAIL because external_scaler_enabled=false
HttpClient httpClient = HttpClientBuilder.create().build();
String scaleUrl =
String.format(
"http://localhost:8265/api/v1/applications/%s/deployments/%s/scale",
appName, deploymentName);

HttpPost scaleRequest = new HttpPost(scaleUrl);
scaleRequest.setEntity(new StringEntity("{\"target_num_replicas\": 2}"));
scaleRequest.setHeader("Content-Type", "application/json");

try (CloseableHttpResponse response =
(CloseableHttpResponse) httpClient.execute(scaleRequest)) {
int statusCode = response.getCode();
String responseBody =
new String(EntityUtils.toByteArray(response.getEntity()), StandardCharsets.UTF_8);

// Should fail with status 412 (Precondition Failed)
Assert.assertEquals(
statusCode,
412,
"Scaling should fail with 412 when external_scaler_enabled=false. Response: "
+ responseBody);
Assert.assertTrue(
responseBody.contains("external_scaler_enabled")
|| responseBody.contains("ExternalScalerNotEnabledError"),
"Response should contain external_scaler_enabled error message. Response: "
+ responseBody);
}
}
}
13 changes: 8 additions & 5 deletions python/ray/dashboard/modules/serve/serve_head.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,10 @@ def _create_json_response(self, data, status: int) -> Response:
@validate_endpoint()
async def scale_deployment(self, req: Request) -> Response:
from ray.serve._private.common import DeploymentID
from ray.serve._private.exceptions import DeploymentIsBeingDeletedError
from ray.serve._private.exceptions import (
DeploymentIsBeingDeletedError,
ExternalScalerNotEnabledError,
)
from ray.serve.schema import ScaleDeploymentRequest

# Extract path parameters
Expand Down Expand Up @@ -250,11 +253,11 @@ async def scale_deployment(self, req: Request) -> Response:
200,
)
except Exception as e:
if isinstance(e.cause, DeploymentIsBeingDeletedError):
if isinstance(
e.cause, (ExternalScalerNotEnabledError, DeploymentIsBeingDeletedError)
):
return self._create_json_response(
# From customer's viewpoint, the deployment is deleted instead of being deleted
# as they must have already executed the delete command
{"error": "Deployment is deleted"},
{"error": str(e)},
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Unhandled Local Exceptions in Ray Task Error Handling

The exception handler attempts to access e.cause without checking if e is a RayTaskError. Only RayTaskError exceptions have a cause attribute. If a local exception occurs before or during the remote call (e.g., during JSON parsing or request validation), accessing e.cause will raise an AttributeError. The code should check if e is an instance of RayTaskError before accessing its .cause attribute, or use getattr(e, 'cause', None) to safely access it.

Fix in Cursor Fix in Web

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

changed it now.

412,
)
if isinstance(e, ValueError) and "not found" in str(e):
Expand Down
Loading