diff --git a/deploy/cloud/api-store/ai_dynamo_store/api/components.py b/deploy/cloud/api-store/ai_dynamo_store/api/components.py index 95848e7496..98fb5f4b62 100644 --- a/deploy/cloud/api-store/ai_dynamo_store/api/components.py +++ b/deploy/cloud/api-store/ai_dynamo_store/api/components.py @@ -165,7 +165,7 @@ class DynamoComponentVersionApiSchema(BaseModel): class DynamoComponentVersionManifestSchema(BaseModel): service: str - bentoml_version: Optional[str] = None + dynamo_version: Optional[str] = None apis: Dict[str, DynamoComponentVersionApiSchema] size_bytes: int @@ -179,7 +179,7 @@ def _validate_manifest(v): class DynamoComponentVersionSchema(ResourceSchema): - bento_repository_uid: str + dynamo_repository_uid: str version: str description: str image_build_status: ImageBuildStatus @@ -205,9 +205,9 @@ class DynamoComponentVersionFullSchema(DynamoComponentVersionSchema): class DynamoComponentSchema(ResourceSchema): - latest_bento: Optional[DynamoComponentVersionSchema] - latest_bentos: Optional[List[DynamoComponentVersionSchema]] - n_bentos: int + latest_dynamo: Optional[DynamoComponentVersionSchema] + latest_dynamos: Optional[List[DynamoComponentVersionSchema]] + n_dynamos: int description: str diff --git a/deploy/cloud/api-store/ai_dynamo_store/api/deployments.py b/deploy/cloud/api-store/ai_dynamo_store/api/deployments.py index f322678f97..d7202d1d62 100644 --- a/deploy/cloud/api-store/ai_dynamo_store/api/deployments.py +++ b/deploy/cloud/api-store/ai_dynamo_store/api/deployments.py @@ -85,13 +85,13 @@ async def create_deployment(deployment: CreateDeploymentSchema): kube_namespace = get_namespace() # Generate deployment name - deployment_name = sanitize_deployment_name(deployment.name, deployment.bento) + deployment_name = sanitize_deployment_name(deployment.name, deployment.dynamo) # Create the deployment using helper function created_crd = create_dynamo_deployment( name=deployment_name, namespace=kube_namespace, - dynamo_component=deployment.bento or deployment.component, + dynamo_component=deployment.dynamo or deployment.component, labels={ "ngc-organization": ownership["organization_id"], "ngc-user": ownership["user_id"], @@ -323,17 +323,17 @@ def update_deployment(name: str, deployment: UpdateDeploymentSchema): ownership = {"organization_id": "default-org", "user_id": "default-user"} kube_namespace = get_namespace() existing_deployment = get_deployment(name) - if existing_deployment.bento != deployment.bento: + if existing_deployment.dynamo != deployment.dynamo: raise HTTPException( status_code=422, detail="Cannot update the Dynamo components of a deployment.", ) - deployment_name = sanitize_deployment_name(name, deployment.bento) + deployment_name = sanitize_deployment_name(name, deployment.dynamo) updated_crd = update_dynamo_deployment( name=deployment_name, namespace=kube_namespace, - dynamo_nim=deployment.bento, + dynamo_nim=deployment.dynamo, labels={ "ngc-organization": ownership["organization_id"], "ngc-user": ownership["user_id"], diff --git a/deploy/cloud/api-store/ai_dynamo_store/api/dynamo.py b/deploy/cloud/api-store/ai_dynamo_store/api/dynamo.py index c6bddece7b..08264468b5 100644 --- a/deploy/cloud/api-store/ai_dynamo_store/api/dynamo.py +++ b/deploy/cloud/api-store/ai_dynamo_store/api/dynamo.py @@ -122,7 +122,7 @@ async def dynamo_component_handler( @router.get( - "/bento_repositories/{dynamo_component_name}", + "/dynamo_repositories/{dynamo_component_name}", responses={ 200: {"description": "Successful Response"}, 422: {"description": "Validation Error"}, @@ -169,16 +169,16 @@ async def get_dynamo_component( resource_type=ResourceType.DynamoComponent, labels=[], description=dynamo_component.description, - latest_bento=None + latest_dynamo=None if not latest_dynamo_component_versions else latest_dynamo_component_versions[0], - latest_bentos=latest_dynamo_component_versions, - n_bentos=len(dynamo_components), + latest_dynamos=latest_dynamo_component_versions, + n_dynamos=len(dynamo_components), ) @router.post( - "/bento_repositories", + "/dynamo_repositories", responses={ 200: {"description": "Successful Response"}, 422: {"description": "Validation Error"}, @@ -240,14 +240,14 @@ async def create_dynamo_component( resource_type=ResourceType.DynamoComponent, labels=[], description=db_dynamo_component.description, - latest_bentos=None, - latest_bento=None, - n_bentos=0, + latest_dynamos=None, + latest_dynamo=None, + n_dynamos=0, ) @router.get( - "/bento_repositories", + "/dynamo_repositories", responses={ 200: {"description": "Successful Response"}, 422: {"description": "Validation Error"}, @@ -375,7 +375,7 @@ async def dynamo_component_version_handler( @router.get( - "/bento_repositories/{dynamo_component_name}/bentos/{version}", + "/dynamo_repositories/{dynamo_component_name}/dynamos/{version}", responses={ 200: {"description": "Successful Response"}, 422: {"description": "Validation Error"}, @@ -415,7 +415,7 @@ async def get_dynamo_component_version( @router.post( - "/bento_repositories/{dynamo_component_name}/bentos", + "/dynamo_repositories/{dynamo_component_name}/dynamos", responses={ 200: {"description": "Successful Response"}, 422: {"description": "Validation Error"}, @@ -483,7 +483,7 @@ async def create_dynamo_component_version( @router.get( - "/bento_repositories/{dynamo_component_name}/bentos", + "/dynamo_repositories/{dynamo_component_name}/dynamos", responses={ 200: {"description": "Successful Response"}, 422: {"description": "Validation Error"}, @@ -544,7 +544,7 @@ async def get_dynamo_component_versions( @router.patch( - "/bento_repositories/{dynamo_component_name}/bentos/{version}", + "/dynamo_repositories/{dynamo_component_name}/dynamos/{version}", responses={ 200: {"description": "Successful Response"}, 422: {"description": "Validation Error"}, @@ -588,7 +588,7 @@ async def update_dynamo_component_version( @router.put( - "/bento_repositories/{dynamo_component_name}/bentos/{version}/upload", + "/dynamo_repositories/{dynamo_component_name}/dynamos/{version}/upload", responses={ 200: {"description": "Successful Response"}, 422: {"description": "Validation Error"}, @@ -636,7 +636,7 @@ def generate_file_path(version) -> str: @router.get( - "/bento_repositories/{dynamo_component_name}/bentos/{version}/download", + "/dynamo_repositories/{dynamo_component_name}/dynamos/{version}/download", responses={ 200: {"description": "Successful Response"}, 422: {"description": "Validation Error"}, @@ -672,7 +672,7 @@ async def download_dynamo_component_version( @router.patch( - "/bento_repositories/{dynamo_component_name}/bentos/{version}/start_upload", + "/dynamo_repositories/{dynamo_component_name}/dynamos/{version}/start_upload", responses={ 200: {"description": "Successful Response"}, 422: {"description": "Validation Error"}, @@ -769,13 +769,13 @@ async def convert_dynamo_component_model_to_schema( name=entity.name, resource_type=ResourceType.DynamoComponent, labels=[], - latest_bento=( + latest_dynamo=( None if not dynamo_component_version_schemas else dynamo_component_version_schemas[0] ), - latest_bentos=dynamo_component_version_schemas, - n_bentos=total, + latest_dynamos=dynamo_component_version_schemas, + n_dynamos=total, description=entity.description, ) ) @@ -832,7 +832,7 @@ async def convert_dynamo_component_version_model_to_schema( labels=[], manifest=entity.manifest, updated_at=updated_at, - bento_repository_uid=dynamo_component.id, + dynamo_repository_uid=dynamo_component.id, # upload_started_at=upload_started_at, # upload_finished_at=upload_finished_at, transmission_strategy=TransmissionStrategy.Proxy, diff --git a/deploy/cloud/api-store/ai_dynamo_store/api/k8s.py b/deploy/cloud/api-store/ai_dynamo_store/api/k8s.py index 78753681f4..4c52219b0f 100644 --- a/deploy/cloud/api-store/ai_dynamo_store/api/k8s.py +++ b/deploy/cloud/api-store/ai_dynamo_store/api/k8s.py @@ -83,7 +83,7 @@ def create_dynamo_deployment( Args: name: Deployment name namespace: Target namespace - dynamo_component: Bento name and version (format: name:version) + dynamo_component: Dynamo artifact name and version (format: name:version) labels: Resource labels envs: Optional list of environment variables @@ -215,7 +215,7 @@ def update_dynamo_deployment( Args: name: Deployment name namespace: Target namespace - dynamo_nim: Bento name and version (format: name:version) + dynamo_nim: Dynamo artifact name and version (format: name:version) labels: Resource labels envs: Optional list of environment variables diff --git a/deploy/cloud/api-store/ai_dynamo_store/api/utils.py b/deploy/cloud/api-store/ai_dynamo_store/api/utils.py index b6961d7ce7..8f33190019 100644 --- a/deploy/cloud/api-store/ai_dynamo_store/api/utils.py +++ b/deploy/cloud/api-store/ai_dynamo_store/api/utils.py @@ -21,8 +21,8 @@ def get_deployment_status(resource: Dict[str, Any]) -> str: """ Get the current status of a deployment. - Maps operator status to BentoML status values. - Returns lowercase status values matching BentoML's DeploymentStatus enum. + Maps operator status to Dynamo status values. + Returns lowercase status values matching Dynamo's DeploymentStatus enum. """ status = resource.get("status", {}) conditions = status.get("conditions", []) @@ -43,7 +43,7 @@ def get_deployment_status(resource: Dict[str, Any]) -> str: if state == "failed": return "failed" elif state == "pending": - return "deploying" # map pending to deploying to match BentoML states + return "deploying" # map pending to deploying to match Dynamo states # Default fallback return "unknown" @@ -72,11 +72,11 @@ def build_latest_revision_from_cr(cr: dict) -> dict: spec = cr.get("spec", {}) meta = cr.get("metadata", {}) now = datetime.utcnow().isoformat() + "Z" - bento_str = spec.get("dynamoGraph", "unknown:unknown") - if ":" in bento_str: - bento_name, bento_version = bento_str.split(":", 1) + dynamo_str = spec.get("dynamoGraph", "unknown:unknown") + if ":" in dynamo_str: + dynamo_name, dynamo_version = dynamo_str.split(":", 1) else: - bento_name, bento_version = "unknown", "unknown" + dynamo_name, dynamo_version = "unknown", "unknown" # Dummy creator creator = {"name": "system", "email": "", "first_name": "", "last_name": ""} # Dummy repository @@ -85,24 +85,24 @@ def build_latest_revision_from_cr(cr: dict) -> dict: "created_at": now, "updated_at": now, "deleted_at": None, - "name": bento_name, - "resource_type": "bento_repository", + "name": dynamo_name, + "resource_type": "dynamo_repository", "labels": [], "description": "", - "latest_bento": None, + "latest_dynamo": None, } - # Dummy bento - bento = { - "uid": "dummy-bento-uid", + # Dummy dynamo + dynamo = { + "uid": "dummy-dynamo-uid", "created_at": now, "updated_at": now, "deleted_at": None, - "name": bento_version, - "resource_type": "bento", + "name": dynamo_version, + "resource_type": "dynamo", "labels": [], "description": "", "repository": repository, - "version": bento_version, + "version": dynamo_version, "image_build_status": "", "upload_status": "", "upload_finished_reason": "", @@ -125,7 +125,7 @@ def build_latest_revision_from_cr(cr: dict) -> dict: "access_authorization": True, "envs": spec.get("envs", []), }, - "bento": bento, + "dynamo": dynamo, } # Revision return { diff --git a/deploy/cloud/api-store/ai_dynamo_store/models/schemas.py b/deploy/cloud/api-store/ai_dynamo_store/models/schemas.py index d71d5b0c20..5859f8c7bc 100644 --- a/deploy/cloud/api-store/ai_dynamo_store/models/schemas.py +++ b/deploy/cloud/api-store/ai_dynamo_store/models/schemas.py @@ -56,7 +56,7 @@ class DeploymentConfigSchema(BaseModel): class UpdateDeploymentSchema(DeploymentConfigSchema): - bento: Optional[str] = None + dynamo: Optional[str] = None component: Optional[str] = None diff --git a/deploy/cloud/api-store/ai_dynamo_store/tests/test_utils.py b/deploy/cloud/api-store/ai_dynamo_store/tests/test_utils.py index d78bf6831e..6bbbcdfd39 100644 --- a/deploy/cloud/api-store/ai_dynamo_store/tests/test_utils.py +++ b/deploy/cloud/api-store/ai_dynamo_store/tests/test_utils.py @@ -76,8 +76,8 @@ def test_build_latest_revision_from_cr_minimal(): rev = build_latest_revision_from_cr(cr) assert rev["uid"] == "u1" assert rev["name"] == "n1" - assert rev["targets"][0]["bento"]["repository"]["name"] == "repo" - assert rev["targets"][0]["bento"]["name"] == "ver" + assert rev["targets"][0]["dynamo"]["repository"]["name"] == "repo" + assert rev["targets"][0]["dynamo"]["name"] == "ver" assert rev["targets"][0]["config"]["services"] == {"svc": {}} assert rev["targets"][0]["config"]["envs"] == [{"name": "A", "value": "B"}] @@ -87,14 +87,14 @@ def test_build_latest_revision_from_cr_missing_fields(): rev = build_latest_revision_from_cr(cr) assert rev["uid"] == "dummy-uid" assert rev["name"] == "dummy-revision" - assert rev["targets"][0]["bento"]["repository"]["name"] == "unknown" - assert rev["targets"][0]["bento"]["name"] == "unknown" + assert rev["targets"][0]["dynamo"]["repository"]["name"] == "unknown" + assert rev["targets"][0]["dynamo"]["name"] == "unknown" assert rev["targets"][0]["config"]["services"] == {} assert rev["targets"][0]["config"]["envs"] == [] -def test_build_latest_revision_from_cr_bento_colonless(): +def test_build_latest_revision_from_cr_dynamo_colonless(): cr = {"spec": {"dynamoGraph": "justrepo"}} rev = build_latest_revision_from_cr(cr) - assert rev["targets"][0]["bento"]["repository"]["name"] == "unknown" - assert rev["targets"][0]["bento"]["name"] == "unknown" + assert rev["targets"][0]["dynamo"]["repository"]["name"] == "unknown" + assert rev["targets"][0]["dynamo"]["name"] == "unknown" diff --git a/deploy/helm/deploy.sh b/deploy/helm/deploy.sh index 9a1793f200..93d7d73ccd 100755 --- a/deploy/helm/deploy.sh +++ b/deploy/helm/deploy.sh @@ -51,11 +51,11 @@ docker login "$DOCKER_REGISTRY" # Change to the specified directory cd "$DYNAMO_DIRECTORY" -# Build the Bento container -echo "Building Bento image for $DYNAMO_IDENTIFIER..." +# Build the Dynamo application container +echo "Building Dynamo application image for $DYNAMO_IDENTIFIER..." DOCKER_DEFAULT_PLATFORM=linux/amd64 uv run dynamo build --containerize $DYNAMO_IDENTIFIER -# Extract the module and the bento name +# Extract the module and the dynamo name DYNAMO_MODULE=$(echo "$DYNAMO_IDENTIFIER" | awk -F':' '{print $1}' | tr '[:upper:]' '[:lower:]') DYNAMO_NAME=$(echo "$DYNAMO_IDENTIFIER" | awk -F':' '{print $2}' | tr '[:upper:]' '[:lower:]') diff --git a/deploy/sdk/src/dynamo/sdk/__init__.py b/deploy/sdk/src/dynamo/sdk/__init__.py index 6a8a2621fc..de86aae3e5 100644 --- a/deploy/sdk/src/dynamo/sdk/__init__.py +++ b/deploy/sdk/src/dynamo/sdk/__init__.py @@ -16,8 +16,6 @@ import warnings from typing import Any -# Suppress warning from setuptools caused by bentoml -# TODO: Remove this line after the bentoml import is removed from this file warnings.filterwarnings("ignore", category=UserWarning, message=".*pkg_resources.*") # flake8: noqa: E402 diff --git a/deploy/sdk/src/dynamo/sdk/cli/cli.py b/deploy/sdk/src/dynamo/sdk/cli/cli.py index 80c3902bcc..12e27d2172 100644 --- a/deploy/sdk/src/dynamo/sdk/cli/cli.py +++ b/deploy/sdk/src/dynamo/sdk/cli/cli.py @@ -61,7 +61,6 @@ def main( ): """ The Dynamo CLI is a CLI for serving, containerizing, and deploying Dynamo applications. - It takes inspiration from and leverages core pieces of the BentoML deployment stack. At a high level, you use `serve` to run a set of dynamo services locally, `build` and `containerize` to package them up for deployment, and then `cloud` diff --git a/deploy/sdk/src/dynamo/sdk/cli/serve.py b/deploy/sdk/src/dynamo/sdk/cli/serve.py index 68d498ec4b..c5c795ff4d 100644 --- a/deploy/sdk/src/dynamo/sdk/cli/serve.py +++ b/deploy/sdk/src/dynamo/sdk/cli/serve.py @@ -109,7 +109,7 @@ def serve( target: TargetEnum = typer.Option( TargetEnum.DYNAMO, "--target", - help="Specify the target: 'dynamo' or 'bento'.", + help="Specify the target: 'dynamo'", case_sensitive=False, ), ): diff --git a/deploy/sdk/src/dynamo/sdk/cli/serve_dynamo.py b/deploy/sdk/src/dynamo/sdk/cli/serve_dynamo.py index 00f9e0e850..44cbbd7853 100644 --- a/deploy/sdk/src/dynamo/sdk/cli/serve_dynamo.py +++ b/deploy/sdk/src/dynamo/sdk/cli/serve_dynamo.py @@ -94,12 +94,12 @@ def add_fastapi_routes(app, service, class_instance): @app.command() def main( - bento_identifier: str = typer.Argument(".", help="The bento identifier"), + dynamo_identifier: str = typer.Argument(".", help="The dynamo identifier"), service_name: str = typer.Option("", help="Service name"), runner_map: str = typer.Option( None, - envvar="BENTOML_RUNNER_MAP", - help="JSON string of runners map, default sets to envars `BENTOML_RUNNER_MAP`", + envvar="DYNAMO_RUNNER_MAP", + help="JSON string of runners map, default sets to envars `DYNAMO_RUNNER_MAP`", ), worker_env: str = typer.Option(None, help="Environment variables"), worker_id: int = typer.Option( @@ -112,7 +112,7 @@ def main( ), target: str = typer.Option( "dynamo", - help="Specify the target: 'dynamo' or 'bento'.", + help="Specify the target: 'dynamo' or 'dynamo'.", ), ) -> None: """Start a worker for the given service - either Dynamo or regular service""" @@ -137,7 +137,7 @@ def main( f"the maximum worker ID is {len(env_list)}" ) os.environ.update(env_list[worker_key]) - service = find_and_load_service(bento_identifier) + service = find_and_load_service(dynamo_identifier) if service_name and service_name != service.name: service = service.find_dependent_by_name(service_name) diff --git a/deploy/sdk/src/dynamo/sdk/cli/serving.py b/deploy/sdk/src/dynamo/sdk/cli/serving.py index 86bb7f67bb..21b0114b3d 100644 --- a/deploy/sdk/src/dynamo/sdk/cli/serving.py +++ b/deploy/sdk/src/dynamo/sdk/cli/serving.py @@ -47,12 +47,12 @@ def _get_dynamo_worker_script( - bento_identifier: str, svc_name: str, target: TargetEnum + dynamo_identifier: str, svc_name: str, target: TargetEnum ) -> list[str]: args = [ "-m", _DYNAMO_WORKER_SCRIPT, - bento_identifier, + dynamo_identifier, "--service-name", svc_name, "--worker-id", @@ -64,7 +64,7 @@ def _get_dynamo_worker_script( def create_dynamo_watcher( - bento_identifier: str, + dynamo_identifier: str, svc: ServiceProtocol, uds_path: str, scheduler: ResourceAllocator, @@ -77,7 +77,7 @@ def create_dynamo_watcher( num_workers, resource_envs = scheduler.get_resource_envs(svc) uri, socket = _get_server_socket(svc, uds_path) - args = _get_dynamo_worker_script(bento_identifier, svc.name, target) + args = _get_dynamo_worker_script(dynamo_identifier, svc.name, target) if resource_envs: args.extend(["--worker-env", json.dumps(resource_envs)]) @@ -265,7 +265,7 @@ def serve_dynamo_graph( # these resource_envs are passed to each individual worker's environment which is set in serve_dynamo if resource_envs: dynamo_args.extend(["--worker-env", json.dumps(resource_envs)]) - # env is the base bentoml environment variables. We make a copy and update it to add any service configurations and additional env vars + # env is the base dynamlocal fault tolerence o environment variables. We make a copy and update it to add any service configurations and additional env vars worker_env = env.copy() if env else {} # Pass through the main service config @@ -296,7 +296,7 @@ def serve_dynamo_graph( ) # inject runner map now - inject_env = {"BENTOML_RUNNER_MAP": json.dumps(dependency_map)} + inject_env = {"DYNAMO_RUNNER_MAP": json.dumps(dependency_map)} for watcher in watchers: if watcher.env is None: diff --git a/deploy/sdk/src/dynamo/sdk/cli/utils.py b/deploy/sdk/src/dynamo/sdk/cli/utils.py index 6b3d9eceab..c923f1aa95 100644 --- a/deploy/sdk/src/dynamo/sdk/cli/utils.py +++ b/deploy/sdk/src/dynamo/sdk/cli/utils.py @@ -50,7 +50,7 @@ class ServiceProtocol(Protocol): name: str inner: Any models: list[Any] - bento: Any + dynamo: Any def is_dynamo_component(self) -> bool: ... diff --git a/deploy/sdk/src/dynamo/sdk/core/runner/__init__.py b/deploy/sdk/src/dynamo/sdk/core/runner/__init__.py index 6df82caa5e..dbd02e9822 100644 --- a/deploy/sdk/src/dynamo/sdk/core/runner/__init__.py +++ b/deploy/sdk/src/dynamo/sdk/core/runner/__init__.py @@ -21,4 +21,3 @@ class TargetEnum(str, Enum): """The target deployment environment for the service""" DYNAMO = "dynamo" - BENTO = "bento" diff --git a/deploy/sdk/src/dynamo/sdk/lib/loader.py b/deploy/sdk/src/dynamo/sdk/lib/loader.py index 42213a2317..09264264f6 100644 --- a/deploy/sdk/src/dynamo/sdk/lib/loader.py +++ b/deploy/sdk/src/dynamo/sdk/lib/loader.py @@ -223,7 +223,7 @@ def load_entry_service( config_path = os.path.join(graph_dir, "dynamo.yaml") if not os.path.isfile(config_path): raise FileNotFoundError( - f"Pipeline config (bento.yaml) not found in {graph_dir}" + f"Pipeline config (dynamo.yaml) not found in {graph_dir}" ) with open(config_path, encoding="utf-8") as f: graph_cfg = yaml.safe_load(f) @@ -233,7 +233,7 @@ def load_entry_service( if src_dir not in sys.path: sys.path.insert(0, src_dir) - # Compute size_bytes as the total size of the bento directory + # Compute size_bytes as the total size of the dynamo directory size_bytes = _get_dir_size(graph_dir) service_name = graph_cfg.get("service") diff --git a/docs/API/sdk.md b/docs/API/sdk.md index 364fc952af..5b90ba49ef 100644 --- a/docs/API/sdk.md +++ b/docs/API/sdk.md @@ -29,7 +29,7 @@ limitations under the License. Dynamo is a flexible and performant distributed inferencing solution for large-scale deployments. It is an ecosystem of tools, frameworks, and abstractions that makes the design, customization, and deployment of frontier-level models onto datacenter-scale infrastructure easy to reason about and optimized for your specific inferencing workloads. Dynamo's core is written in Rust and contains a set of well-defined Python bindings. See Python Bindings](./python_bindings.md). -Dynamo SDK is a layer on top of the core. It is a Python framework that makes it easy to create inference graphs and deploy them locally and onto a target K8s cluster. The SDK was heavily inspired by [BentoML's](https://github.com/bentoml/BentoML) open source deployment patterns and leverages many of its core primitives. The Dynamo CLI is a companion tool that allows you to spin up an inference pipeline locally, containerize it, and deploy it. You can find a toy hello-world example and instructions for deploying it [here](../examples/hello_world.md). +Dynamo SDK is a layer on top of the core. It is a Python framework that makes it easy to create inference graphs and deploy them locally and onto a target K8s cluster. The SDK was heavily inspired by [BentoML's](https://github.com/bentoml/BentoML) open source deployment patterns. The Dynamo CLI is a companion tool that allows you to spin up an inference pipeline locally, containerize it, and deploy it. You can find a toy hello-world example and instructions for deploying it [here](../examples/hello_world.md). ## Installation @@ -112,7 +112,7 @@ Dynamo follows a class-based architecture similar to BentoML making it intuitive This approach provides a clean separation of concerns and makes the service structure easy to understand. #### Service Dependencies with `depends()` -The `depends()` function is a powerful BentoML feature that lets you create a dependency between services. When you use `depends(ServiceB)`, several things happen: +The `depends()` function is a powerful feature that lets you create a dependency between services. When you use `depends(ServiceB)`, several things happen: 1. It ensures that `ServiceB` is deployed when `ServiceA` is deployed by adding it to an internal service dependency graph 2. It creates a client to the endpoints of `ServiceB` that is being served under the hood. 3. You are able to access `ServiceB` endpoints as if it were a local function! @@ -149,10 +149,8 @@ self.worker_client = ( This is used in some of our prebuilt examples and is a powerful way to leverage the benefits of the SDK while being able to access Dynamo's core primitives. -You can find more docs on depends [here](https://docs.bentoml.com/en/latest/build-with-bentoml/distributed-services.html#interservice-communication) - #### Lifecycle Hooks -Dynamo supports key lifecycle hooks to manage service initialization and cleanup. We currently only support a subset of BentoML's lifecycle hooks but are working on adding support for the rest. +Dynamo supports key lifecycle hooks to manage service initialization and cleanup. ##### `@async_on_start` diff --git a/docs/guides/cli_overview.md b/docs/guides/cli_overview.md index 63d9c02697..be797091f2 100644 --- a/docs/guides/cli_overview.md +++ b/docs/guides/cli_overview.md @@ -18,7 +18,7 @@ limitations under the License. # About the Dynamo Command Line Interface -The Dynamo CLI serves, containerizes, and deploys Dynamo applications efficiently. It leverages core pieces of the BentoML deployment stack and provides intuitive commands to manage your Dynamo services. +The Dynamo CLI serves, containerizes, and deploys Dynamo applications efficiently. It provides intuitive commands to manage your Dynamo services. ## CLI Capabilities @@ -26,7 +26,7 @@ With the Dynamo CLI, you can: * Chat with models quickly using `run` * Serve multiple services locally using `serve` -* Package your services into archives (called `bentos`) using `build` +* Package your services into archive (called `dynamo artifact`) using `build` * Deploy pipelines to Dynamo Cloud using `deploy` ## Commands @@ -83,7 +83,7 @@ dynamo build [SERVICE] **Flags** * `--working-dir`: Specify the directory for finding the Service instance -* `--containerize`: Choose whether to create a container from the Bento after building +* `--containerize`: Choose whether to create a container from the dynamo artifact after building **Example** ```bash diff --git a/examples/llm/configs/disagg.yaml b/examples/llm/configs/disagg.yaml index e746143316..77f405a9f9 100644 --- a/examples/llm/configs/disagg.yaml +++ b/examples/llm/configs/disagg.yaml @@ -26,7 +26,6 @@ Frontend: Processor: router: round-robin common-configs: [model, block-size] - prompt-template: "USER: \n ASSISTANT:" VllmWorker: remote-prefill: true diff --git a/lib/bindings/python/src/dynamo/runtime/logging.py b/lib/bindings/python/src/dynamo/runtime/logging.py index 774af3e06f..2d01e98ead 100644 --- a/lib/bindings/python/src/dynamo/runtime/logging.py +++ b/lib/bindings/python/src/dynamo/runtime/logging.py @@ -97,7 +97,7 @@ def configure_dynamo_logging( configure_sglang_logging(dyn_level) # loggers that should be configured to ERROR - error_loggers = ["bentoml", "tag"] + error_loggers = ["tag"] for logger_name in error_loggers: logger = logging.getLogger(logger_name) logger.handlers = []