-
Notifications
You must be signed in to change notification settings - Fork 7k
add docs for post API #57698
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add docs for post API #57698
Changes from 10 commits
ec8fb2e
1c22056
63f95c7
ce8c21b
021fc91
a400c24
d41ff5f
19dd017
b497cd5
1a7dcd4
cc625d6
63c4d0d
75a506b
b69bcf6
3b220ea
3a354ab
567c922
84ef92c
2bfdde7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| # __external_scaler_config_begin__ | ||
| applications: | ||
| - name: my-app | ||
| import_path: my_module:app | ||
harshit-anyscale marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| external_scaler_enabled: true | ||
| deployments: | ||
| - name: my-deployment | ||
| num_replicas: 1 | ||
harshit-anyscale marked this conversation as resolved.
Show resolved
Hide resolved
harshit-anyscale marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| # __external_scaler_config_end__ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,37 @@ | ||
| # __serve_example_begin__ | ||
| import time | ||
| from ray import serve | ||
|
|
||
|
|
||
| @serve.deployment(num_replicas=3, external_scaler_enabled=True) | ||
| class TextProcessor: | ||
| """A simple text processing deployment that can be scaled externally.""" | ||
| def __init__(self): | ||
| self.request_count = 0 | ||
|
|
||
| def __call__(self, text: str) -> dict: | ||
| # Simulate text processing work | ||
| time.sleep(0.1) | ||
| self.request_count += 1 | ||
| return { | ||
| "processed_text": text.upper(), | ||
| "length": len(text), | ||
| "request_count": self.request_count, | ||
| } | ||
|
|
||
|
|
||
| app = TextProcessor.bind() | ||
| # __serve_example_end__ | ||
|
|
||
| if __name__ == "__main__": | ||
| import requests | ||
|
|
||
| serve.run(app) | ||
|
|
||
| # Test the deployment | ||
| resp = requests.post( | ||
| "http://localhost:8000/", | ||
| json="hello world" | ||
| ) | ||
| print(f"Response: {resp.json()}") | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,81 @@ | ||
| # __client_script_begin__ | ||
| import logging | ||
| import time | ||
| from datetime import datetime | ||
| import requests | ||
|
|
||
| APPLICATION_NAME = "text-processor-app" | ||
| DEPLOYMENT_NAME = "TextProcessor" | ||
| AUTH_TOKEN = "YOUR_TOKEN_HERE" # Get from Ray dashboard at http://localhost:8265 | ||
| SERVE_ENDPOINT = "http://localhost:8000" | ||
| SCALING_INTERVAL = 300 # Check every 5 minutes | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| def get_current_replicas(app_name: str, deployment_name: str, token: str) -> int: | ||
| """Get current replica count. Returns -1 on error. | ||
| Response schema: https://docs.ray.io/en/latest/serve/api/doc/ray.serve.schema.ServeInstanceDetails.html | ||
| """ | ||
| try: | ||
| resp = requests.get( | ||
| f"{SERVE_ENDPOINT}/api/v1/applications", | ||
| headers={"Authorization": f"Bearer {token}"}, | ||
| timeout=10 | ||
| ) | ||
| if resp.status_code != 200: | ||
| logger.error(f"Failed to get applications: {resp.status_code}") | ||
| return -1 | ||
|
|
||
| apps = resp.json().get("applications", {}) | ||
| if app_name not in apps: | ||
| logger.error(f"Application {app_name} not found") | ||
| return -1 | ||
|
|
||
| for deployment in apps[app_name].get("deployments", []): | ||
| if deployment["name"] == deployment_name: | ||
| return deployment["target_num_replicas"] | ||
|
|
||
| logger.error(f"Deployment {deployment_name} not found") | ||
| return -1 | ||
| except requests.exceptions.RequestException as e: | ||
| logger.error(f"Request failed: {e}") | ||
| return -1 | ||
|
|
||
|
|
||
| def scale_deployment(app_name: str, deployment_name: str, token: str): | ||
| """Scale deployment based on time of day.""" | ||
| hour = datetime.now().hour | ||
| current = get_current_replicas(app_name, deployment_name, token) | ||
| target = 10 if 9 <= hour < 17 else 3 # Peak hours: 9am-5pm | ||
|
|
||
| delta = target - current | ||
| if delta == 0: | ||
| logger.info(f"Already at target ({current} replicas)") | ||
| return | ||
|
|
||
| action = "Adding" if delta > 0 else "Removing" | ||
| logger.info(f"{action} {abs(delta)} replicas ({current} -> {target})") | ||
|
|
||
| try: | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: Deployment Scaling Fails on Replica Retrieval ErrorThe |
||
| resp = requests.post( | ||
| f"{SERVE_ENDPOINT}/api/v1/applications/{app_name}/deployments/{deployment_name}/scale", | ||
| headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"}, | ||
| json={"target_num_replicas": target}, | ||
| timeout=10 | ||
harshit-anyscale marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| ) | ||
| if resp.status_code == 200: | ||
| logger.info("Successfully scaled deployment") | ||
| else: | ||
| logger.error(f"Scale failed: {resp.status_code} - {resp.text}") | ||
| except requests.exceptions.RequestException as e: | ||
| logger.error(f"Request failed: {e}") | ||
|
|
||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: Deployment Scaling Fails on Replica Retrieval ErrorThe |
||
|
|
||
| if __name__ == "__main__": | ||
| logger.info(f"Starting predictive scaling for {APPLICATION_NAME}/{DEPLOYMENT_NAME}") | ||
| while True: | ||
| scale_deployment(APPLICATION_NAME, DEPLOYMENT_NAME, AUTH_TOKEN) | ||
| time.sleep(SCALING_INTERVAL) | ||
| # __client_script_end__ | ||
Uh oh!
There was an error while loading. Please reload this page.