From 776e0060b8c704fb0e84db0f67a9c16ca9c2380f Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Fri, 17 May 2024 13:35:13 -0700 Subject: [PATCH 01/17] Add TEI Serving --- src/sagemaker/serve/builder/tei_builder.py | 8 +- .../serve/mode/local_container_mode.py | 2 + .../serve/model_server/tei/__init__.py | 0 .../serve/model_server/tei/prepare.py | 0 .../serve/model_server/tei/server.py | 160 ++++++++++++++++++ src/sagemaker/serve/model_server/tei/utils.py | 1 + src/sagemaker/serve/utils/types.py | 1 + 7 files changed, 168 insertions(+), 4 deletions(-) create mode 100644 src/sagemaker/serve/model_server/tei/__init__.py create mode 100644 src/sagemaker/serve/model_server/tei/prepare.py create mode 100644 src/sagemaker/serve/model_server/tei/server.py create mode 100644 src/sagemaker/serve/model_server/tei/utils.py diff --git a/src/sagemaker/serve/builder/tei_builder.py b/src/sagemaker/serve/builder/tei_builder.py index 50d3866468..8c09cf6467 100644 --- a/src/sagemaker/serve/builder/tei_builder.py +++ b/src/sagemaker/serve/builder/tei_builder.py @@ -74,16 +74,16 @@ def _prepare_for_mode(self): def _get_client_translators(self): """Placeholder docstring""" - def _set_to_tgi(self): + def _set_to_tei(self): """Placeholder docstring""" - if self.model_server != ModelServer.TGI: + if self.model_server != ModelServer.TEI: messaging = ( "HuggingFace Model ID support on model server: " f"{self.model_server} is not currently supported. " - f"Defaulting to {ModelServer.TGI}" + f"Defaulting to {ModelServer.TEI}" ) logger.warning(messaging) - self.model_server = ModelServer.TGI + self.model_server = ModelServer.TEI def _create_tei_model(self, **kwargs) -> Type[Model]: """Placeholder docstring""" diff --git a/src/sagemaker/serve/mode/local_container_mode.py b/src/sagemaker/serve/mode/local_container_mode.py index f940e2959c..d145a1de28 100644 --- a/src/sagemaker/serve/mode/local_container_mode.py +++ b/src/sagemaker/serve/mode/local_container_mode.py @@ -21,6 +21,7 @@ from sagemaker.serve.model_server.djl_serving.server import LocalDJLServing from sagemaker.serve.model_server.triton.server import LocalTritonServer from sagemaker.serve.model_server.tgi.server import LocalTgiServing +from sagemaker.serve.model_server.tei.server import LocalTeiServing from sagemaker.serve.model_server.multi_model_server.server import LocalMultiModelServer from sagemaker.session import Session @@ -41,6 +42,7 @@ class LocalContainerMode( LocalTgiServing, LocalMultiModelServer, LocalTensorflowServing, + LocalTeiServing, ): """A class that holds methods to deploy model to a container in local environment""" diff --git a/src/sagemaker/serve/model_server/tei/__init__.py b/src/sagemaker/serve/model_server/tei/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/sagemaker/serve/model_server/tei/prepare.py b/src/sagemaker/serve/model_server/tei/prepare.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/sagemaker/serve/model_server/tei/server.py b/src/sagemaker/serve/model_server/tei/server.py new file mode 100644 index 0000000000..6579f1483c --- /dev/null +++ b/src/sagemaker/serve/model_server/tei/server.py @@ -0,0 +1,160 @@ +"""Module for Local TEI Serving""" + +from __future__ import absolute_import + +import requests +import logging +from pathlib import Path +from docker.types import DeviceRequest +from sagemaker import Session, fw_utils +from sagemaker.serve.utils.exceptions import LocalModelInvocationException +from sagemaker.base_predictor import PredictorBase +from sagemaker.s3_utils import determine_bucket_and_prefix, parse_s3_url, s3_path_join +from sagemaker.s3 import S3Uploader +from sagemaker.local.utils import get_docker_host + + +MODE_DIR_BINDING = "/opt/ml/model/" +_SHM_SIZE = "2G" +_DEFAULT_ENV_VARS = { + "TRANSFORMERS_CACHE": "/opt/ml/model/", + "HUGGINGFACE_HUB_CACHE": "/opt/ml/model/", +} + +logger = logging.getLogger(__name__) + + +class LocalTeiServing: + """LocalTeiServing class""" + + def _start_tei_serving( + self, client: object, image: str, model_path: str, secret_key: str, env_vars: dict + ): + """Starts a local tei serving container. + + Args: + client: Docker client + image: Image to use + model_path: Path to the model + secret_key: Secret key to use for authentication + env_vars: Environment variables to set + """ + if env_vars and secret_key: + env_vars['SAGEMAKER_SERVE_SECRET_KEY'] = secret_key + + self.container = client.containers.run( + image, + shm_size=_SHM_SIZE, + device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])], + network_mode="host", + detach=True, + auto_remove=True, + volumes={ + Path(model_path).joinpath("code"): { + "bind": MODE_DIR_BINDING, + "mode": "rw", + }, + }, + environment=_update_env_vars(env_vars), + ) + + def _invoke_tei_serving(self, request: object, content_type: str, accept: str): + """Invokes a local tei serving container. + + Args: + request: Request to send + content_type: Content type to use + accept: Accept to use + """ + try: + response = requests.post( + f"http://{get_docker_host()}:8080/invocations", + data=request, + headers={"Content-Type": content_type, "Accept": accept}, + timeout=600, + ) + response.raise_for_status() + return response.content + except Exception as e: + raise Exception("Unable to send request to the local container server") from e + + def _tei_deep_ping(self, predictor: PredictorBase): + """Checks if the local tei serving container is up and running. + + If the container is not up and running, it will raise an exception. + """ + response = None + try: + response = predictor.predict(self.schema_builder.sample_input) + return (True, response) + # pylint: disable=broad-except + except Exception as e: + if "422 Client Error: Unprocessable Entity for url" in str(e): + raise LocalModelInvocationException(str(e)) + return (False, response) + + return (True, response) + + +class SageMakerTeiServing: + """SageMakerTeiServing class""" + + def _upload_tei_artifacts( + self, + model_path: str, + sagemaker_session: Session, + s3_model_data_url: str = None, + image: str = None, + env_vars: dict = None, + ): + """Uploads the model artifacts to S3. + + Args: + model_path: Path to the model + sagemaker_session: SageMaker session + s3_model_data_url: S3 model data URL + image: Image to use + env_vars: Environment variables to set + """ + if s3_model_data_url: + bucket, key_prefix = parse_s3_url(url=s3_model_data_url) + else: + bucket, key_prefix = None, None + + code_key_prefix = fw_utils.model_code_key_prefix(key_prefix, None, image) + + bucket, code_key_prefix = determine_bucket_and_prefix( + bucket=bucket, key_prefix=code_key_prefix, sagemaker_session=sagemaker_session + ) + + code_dir = Path(model_path).joinpath("code") + + s3_location = s3_path_join("s3://", bucket, code_key_prefix, "code") + + logger.debug("Uploading TGI Model Resources uncompressed to: %s", s3_location) + + model_data_url = S3Uploader.upload( + str(code_dir), + s3_location, + None, + sagemaker_session, + ) + + model_data = { + "S3DataSource": { + "CompressionType": "None", + "S3DataType": "S3Prefix", + "S3Uri": model_data_url + "/", + } + } + + return (model_data, _update_env_vars(env_vars)) + + +def _update_env_vars(env_vars: dict) -> dict: + """Placeholder docstring""" + updated_env_vars = {} + updated_env_vars.update(_DEFAULT_ENV_VARS) + if env_vars: + updated_env_vars.update(env_vars) + return updated_env_vars diff --git a/src/sagemaker/serve/model_server/tei/utils.py b/src/sagemaker/serve/model_server/tei/utils.py new file mode 100644 index 0000000000..01e3b29f6c --- /dev/null +++ b/src/sagemaker/serve/model_server/tei/utils.py @@ -0,0 +1 @@ +"""TEI ModelBuilder Utils""" \ No newline at end of file diff --git a/src/sagemaker/serve/utils/types.py b/src/sagemaker/serve/utils/types.py index 661093f249..3ac80aa7ea 100644 --- a/src/sagemaker/serve/utils/types.py +++ b/src/sagemaker/serve/utils/types.py @@ -18,6 +18,7 @@ def __str__(self): DJL_SERVING = 4 TRITON = 5 TGI = 6 + TEI = 7 class _DjlEngine(Enum): From 388ea2a789eb87d576f08a7f288cb604cca4386d Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Fri, 17 May 2024 13:49:56 -0700 Subject: [PATCH 02/17] Add TEI Serving --- src/sagemaker/serve/builder/tei_builder.py | 4 +- .../serve/mode/local_container_mode.py | 9 ++++ src/sagemaker/serve/utils/predictors.py | 43 +++++++++++++++++++ 3 files changed, 54 insertions(+), 2 deletions(-) diff --git a/src/sagemaker/serve/builder/tei_builder.py b/src/sagemaker/serve/builder/tei_builder.py index 8c09cf6467..75995bbc94 100644 --- a/src/sagemaker/serve/builder/tei_builder.py +++ b/src/sagemaker/serve/builder/tei_builder.py @@ -25,7 +25,7 @@ _get_nb_instance, ) from sagemaker.serve.model_server.tgi.prepare import _create_dir_structure -from sagemaker.serve.utils.predictors import TgiLocalModePredictor +from sagemaker.serve.utils.predictors import TeiLocalModePredictor from sagemaker.serve.utils.types import ModelServer from sagemaker.serve.mode.function_pointers import Mode from sagemaker.serve.utils.telemetry_logger import _capture_telemetry @@ -142,7 +142,7 @@ def _tei_model_builder_deploy_wrapper(self, *args, **kwargs) -> Type[PredictorBa if self.mode == Mode.LOCAL_CONTAINER: timeout = kwargs.get("model_data_download_timeout") - predictor = TgiLocalModePredictor( + predictor = TeiLocalModePredictor( self.modes[str(Mode.LOCAL_CONTAINER)], serializer, deserializer ) diff --git a/src/sagemaker/serve/mode/local_container_mode.py b/src/sagemaker/serve/mode/local_container_mode.py index d145a1de28..8e1f48e593 100644 --- a/src/sagemaker/serve/mode/local_container_mode.py +++ b/src/sagemaker/serve/mode/local_container_mode.py @@ -158,6 +158,15 @@ def create_server( env_vars=env_vars if env_vars else self.env_vars, ) self._ping_container = self._tensorflow_serving_deep_ping + elif self.model_server == ModelServer.TEI: + self._start_tei_serving( + client=self.client, + image=image, + model_path=model_path if model_path else self.model_path, + secret_key=secret_key, + env_vars=env_vars if env_vars else self.env_vars, + ) + self._ping_container = self._tei_deep_ping # allow some time for container to be ready time.sleep(10) diff --git a/src/sagemaker/serve/utils/predictors.py b/src/sagemaker/serve/utils/predictors.py index 866167c2c6..e79d4a8fd6 100644 --- a/src/sagemaker/serve/utils/predictors.py +++ b/src/sagemaker/serve/utils/predictors.py @@ -209,6 +209,49 @@ def delete_predictor(self): self._mode_obj.destroy_server() +class TeiLocalModePredictor(PredictorBase): + """Lightweight Tei predictor for local deployment in IN_PROCESS and LOCAL_CONTAINER modes""" + + def __init__( + self, + mode_obj: Type[LocalContainerMode], + serializer=JSONSerializer(), + deserializer=JSONDeserializer(), + ): + self._mode_obj = mode_obj + self.serializer = serializer + self.deserializer = deserializer + + def predict(self, data): + """Placeholder docstring""" + return [ + self.deserializer.deserialize( + io.BytesIO( + self._mode_obj._invoke_tei_serving( + self.serializer.serialize(data), + self.content_type, + self.deserializer.ACCEPT[0], + ) + ), + self.content_type, + ) + ] + + @property + def content_type(self): + """The MIME type of the data sent to the inference endpoint.""" + return self.serializer.CONTENT_TYPE + + @property + def accept(self): + """The content type(s) that are expected from the inference endpoint.""" + return self.deserializer.ACCEPT + + def delete_predictor(self): + """Shut down and remove the container that you created in LOCAL_CONTAINER mode""" + self._mode_obj.destroy_server() + + class TensorflowServingLocalPredictor(PredictorBase): """Lightweight predictor for local deployment in LOCAL_CONTAINER modes""" From 7ee2e44839bd7c0e544b8d4692b91811954df4b9 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Fri, 17 May 2024 14:05:34 -0700 Subject: [PATCH 03/17] Add TEI Serving --- src/sagemaker/serve/builder/tei_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sagemaker/serve/builder/tei_builder.py b/src/sagemaker/serve/builder/tei_builder.py index 75995bbc94..f187cd0332 100644 --- a/src/sagemaker/serve/builder/tei_builder.py +++ b/src/sagemaker/serve/builder/tei_builder.py @@ -216,7 +216,7 @@ def _build_for_tei(self): """Placeholder docstring""" self.secret_key = None - self._set_to_tgi() + self._set_to_tei() self.pysdk_model = self._build_for_hf_tei() return self.pysdk_model From d9d4f1360bcd996bf72357834c9e938b5a7dbf97 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Fri, 17 May 2024 14:12:44 -0700 Subject: [PATCH 04/17] Add TEI Serving --- src/sagemaker/serve/utils/telemetry_logger.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/sagemaker/serve/utils/telemetry_logger.py b/src/sagemaker/serve/utils/telemetry_logger.py index 8983a4b5c9..6eb49c8d67 100644 --- a/src/sagemaker/serve/utils/telemetry_logger.py +++ b/src/sagemaker/serve/utils/telemetry_logger.py @@ -52,12 +52,8 @@ } MODEL_SERVER_TO_CODE = { - str(ModelServer.TORCHSERVE): 1, - str(ModelServer.MMS): 2, - str(ModelServer.TENSORFLOW_SERVING): 3, - str(ModelServer.DJL_SERVING): 4, - str(ModelServer.TRITON): 5, - str(ModelServer.TGI): 6, + str(model_server): model_server.value + for model_server in ModelServer } MLFLOW_MODEL_PATH_CODE = { From 0f36bb2bd51c752b704f12d4c60b2f95f05f5a87 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Fri, 17 May 2024 14:14:45 -0700 Subject: [PATCH 05/17] Add TEI Serving --- src/sagemaker/serve/utils/telemetry_logger.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/sagemaker/serve/utils/telemetry_logger.py b/src/sagemaker/serve/utils/telemetry_logger.py index 6eb49c8d67..99aeb4ff26 100644 --- a/src/sagemaker/serve/utils/telemetry_logger.py +++ b/src/sagemaker/serve/utils/telemetry_logger.py @@ -52,8 +52,13 @@ } MODEL_SERVER_TO_CODE = { - str(model_server): model_server.value - for model_server in ModelServer + str(ModelServer.TORCHSERVE): 1, + str(ModelServer.MMS): 2, + str(ModelServer.TENSORFLOW_SERVING): 3, + str(ModelServer.DJL_SERVING): 4, + str(ModelServer.TRITON): 5, + str(ModelServer.TGI): 6, + str(ModelServer.TEI): 7, } MLFLOW_MODEL_PATH_CODE = { From aaeee36ec7f2fa108590afb98ccefe3910f78851 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Fri, 17 May 2024 14:21:09 -0700 Subject: [PATCH 06/17] Add TEI Serving --- src/sagemaker/serve/mode/local_container_mode.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sagemaker/serve/mode/local_container_mode.py b/src/sagemaker/serve/mode/local_container_mode.py index 8e1f48e593..2a86e6d7de 100644 --- a/src/sagemaker/serve/mode/local_container_mode.py +++ b/src/sagemaker/serve/mode/local_container_mode.py @@ -226,6 +226,7 @@ def _pull_image(self, image: str): logger.warning("Unable to login to ecr: %s", e) self.client = docker.from_env() + print(self.client.images.list()) try: logger.info("Pulling image %s from repository...", image) self.client.images.pull(image) From b33a93dae95a92d6ec7fc3eb56ad7b98b6a8deb5 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Fri, 17 May 2024 14:38:06 -0700 Subject: [PATCH 07/17] Notebook testing --- src/sagemaker/serve/mode/local_container_mode.py | 1 - src/sagemaker/serve/mode/sagemaker_endpoint_mode.py | 10 ++++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/sagemaker/serve/mode/local_container_mode.py b/src/sagemaker/serve/mode/local_container_mode.py index 2a86e6d7de..8e1f48e593 100644 --- a/src/sagemaker/serve/mode/local_container_mode.py +++ b/src/sagemaker/serve/mode/local_container_mode.py @@ -226,7 +226,6 @@ def _pull_image(self, image: str): logger.warning("Unable to login to ecr: %s", e) self.client = docker.from_env() - print(self.client.images.list()) try: logger.info("Pulling image %s from repository...", image) self.client.images.pull(image) diff --git a/src/sagemaker/serve/mode/sagemaker_endpoint_mode.py b/src/sagemaker/serve/mode/sagemaker_endpoint_mode.py index 24acfc6a2f..18fa33fad6 100644 --- a/src/sagemaker/serve/mode/sagemaker_endpoint_mode.py +++ b/src/sagemaker/serve/mode/sagemaker_endpoint_mode.py @@ -6,6 +6,7 @@ import logging from typing import Type +from sagemaker.serve.model_server.tei.server import SageMakerTeiServing from sagemaker.serve.model_server.tensorflow_serving.server import SageMakerTensorflowServing from sagemaker.session import Session from sagemaker.serve.utils.types import ModelServer @@ -26,6 +27,7 @@ class SageMakerEndpointMode( SageMakerTgiServing, SageMakerMultiModelServer, SageMakerTensorflowServing, + SageMakerTeiServing, ): """Holds the required method to deploy a model to a SageMaker Endpoint""" @@ -118,4 +120,12 @@ def prepare( image=image, ) + if self.model_server == ModelServer.TEI: + return self._upload_tei_artifacts( + model_path=model_path, + sagemaker_session=sagemaker_session, + s3_model_data_url=s3_model_data_url, + image=image, + ) + raise ValueError("%s model server is not supported" % self.model_server) From 7c7dce958e772034e0b9500c29e0751ad9f6cef1 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Fri, 17 May 2024 14:40:48 -0700 Subject: [PATCH 08/17] Notebook testing --- src/sagemaker/serve/model_server/tei/server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sagemaker/serve/model_server/tei/server.py b/src/sagemaker/serve/model_server/tei/server.py index 6579f1483c..605c95c2c0 100644 --- a/src/sagemaker/serve/model_server/tei/server.py +++ b/src/sagemaker/serve/model_server/tei/server.py @@ -131,7 +131,7 @@ def _upload_tei_artifacts( s3_location = s3_path_join("s3://", bucket, code_key_prefix, "code") - logger.debug("Uploading TGI Model Resources uncompressed to: %s", s3_location) + logger.debug("Uploading TEI Model Resources uncompressed to: %s", s3_location) model_data_url = S3Uploader.upload( str(code_dir), From 9cfc643d224ff461c79c911d657adc41484e9a2a Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Fri, 17 May 2024 14:49:19 -0700 Subject: [PATCH 09/17] Notebook testing --- src/sagemaker/serve/builder/tei_builder.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/sagemaker/serve/builder/tei_builder.py b/src/sagemaker/serve/builder/tei_builder.py index f187cd0332..6aba3c9da2 100644 --- a/src/sagemaker/serve/builder/tei_builder.py +++ b/src/sagemaker/serve/builder/tei_builder.py @@ -180,7 +180,9 @@ def _tei_model_builder_deploy_wrapper(self, *args, **kwargs) -> Type[PredictorBa if "endpoint_logging" not in kwargs: kwargs["endpoint_logging"] = True - if not self.nb_instance_type and "instance_type" not in kwargs: + if self.nb_instance_type and "instance_type" not in kwargs: + kwargs.update({"instance_type": self.nb_instance_type}) + elif not self.nb_instance_type and "instance_type" not in kwargs: raise ValueError( "Instance type must be provided when deploying " "to SageMaker Endpoint mode." ) From 2044a7e933b469141a01ae8f40bd9c6ec9942717 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Fri, 17 May 2024 15:19:37 -0700 Subject: [PATCH 10/17] Refactoring --- .../serve/mode/sagemaker_endpoint_mode.py | 18 +++++++++++------- .../serve/model_server/tei/prepare.py | 0 src/sagemaker/serve/model_server/tei/server.py | 2 +- src/sagemaker/serve/model_server/tei/utils.py | 1 - 4 files changed, 12 insertions(+), 9 deletions(-) delete mode 100644 src/sagemaker/serve/model_server/tei/prepare.py delete mode 100644 src/sagemaker/serve/model_server/tei/utils.py diff --git a/src/sagemaker/serve/mode/sagemaker_endpoint_mode.py b/src/sagemaker/serve/mode/sagemaker_endpoint_mode.py index 18fa33fad6..247ec90c42 100644 --- a/src/sagemaker/serve/mode/sagemaker_endpoint_mode.py +++ b/src/sagemaker/serve/mode/sagemaker_endpoint_mode.py @@ -68,8 +68,9 @@ def prepare( + "session to be created or supply `sagemaker_session` into @serve.invoke." ) from e + upload_artifacts = None if self.model_server == ModelServer.TORCHSERVE: - return self._upload_torchserve_artifacts( + upload_artifacts = self._upload_torchserve_artifacts( model_path=model_path, sagemaker_session=sagemaker_session, secret_key=secret_key, @@ -78,7 +79,7 @@ def prepare( ) if self.model_server == ModelServer.TRITON: - return self._upload_triton_artifacts( + upload_artifacts = self._upload_triton_artifacts( model_path=model_path, sagemaker_session=sagemaker_session, secret_key=secret_key, @@ -87,7 +88,7 @@ def prepare( ) if self.model_server == ModelServer.DJL_SERVING: - return self._upload_djl_artifacts( + upload_artifacts = self._upload_djl_artifacts( model_path=model_path, sagemaker_session=sagemaker_session, s3_model_data_url=s3_model_data_url, @@ -95,7 +96,7 @@ def prepare( ) if self.model_server == ModelServer.TGI: - return self._upload_tgi_artifacts( + upload_artifacts = self._upload_tgi_artifacts( model_path=model_path, sagemaker_session=sagemaker_session, s3_model_data_url=s3_model_data_url, @@ -104,7 +105,7 @@ def prepare( ) if self.model_server == ModelServer.MMS: - return self._upload_server_artifacts( + upload_artifacts = self._upload_server_artifacts( model_path=model_path, sagemaker_session=sagemaker_session, s3_model_data_url=s3_model_data_url, @@ -112,7 +113,7 @@ def prepare( ) if self.model_server == ModelServer.TENSORFLOW_SERVING: - return self._upload_tensorflow_serving_artifacts( + upload_artifacts = self._upload_tensorflow_serving_artifacts( model_path=model_path, sagemaker_session=sagemaker_session, secret_key=secret_key, @@ -121,11 +122,14 @@ def prepare( ) if self.model_server == ModelServer.TEI: - return self._upload_tei_artifacts( + upload_artifacts = self._upload_tei_artifacts( model_path=model_path, sagemaker_session=sagemaker_session, s3_model_data_url=s3_model_data_url, image=image, ) + if isinstance(self.model_server, ModelServer) and upload_artifacts: + return upload_artifacts + raise ValueError("%s model server is not supported" % self.model_server) diff --git a/src/sagemaker/serve/model_server/tei/prepare.py b/src/sagemaker/serve/model_server/tei/prepare.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/src/sagemaker/serve/model_server/tei/server.py b/src/sagemaker/serve/model_server/tei/server.py index 605c95c2c0..67fca0e847 100644 --- a/src/sagemaker/serve/model_server/tei/server.py +++ b/src/sagemaker/serve/model_server/tei/server.py @@ -40,7 +40,7 @@ def _start_tei_serving( env_vars: Environment variables to set """ if env_vars and secret_key: - env_vars['SAGEMAKER_SERVE_SECRET_KEY'] = secret_key + env_vars["SAGEMAKER_SERVE_SECRET_KEY"] = secret_key self.container = client.containers.run( image, diff --git a/src/sagemaker/serve/model_server/tei/utils.py b/src/sagemaker/serve/model_server/tei/utils.py deleted file mode 100644 index 01e3b29f6c..0000000000 --- a/src/sagemaker/serve/model_server/tei/utils.py +++ /dev/null @@ -1 +0,0 @@ -"""TEI ModelBuilder Utils""" \ No newline at end of file From af5fde2b6c676aba83f5ee58a6434049621f8431 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Mon, 20 May 2024 09:30:14 -0700 Subject: [PATCH 11/17] Refactoring --- src/sagemaker/serve/builder/model_builder.py | 2 +- src/sagemaker/serve/mode/local_container_mode.py | 7 ++++--- src/sagemaker/serve/mode/sagemaker_endpoint_mode.py | 5 +++-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/sagemaker/serve/builder/model_builder.py b/src/sagemaker/serve/builder/model_builder.py index b24f30fb3e..1fe75065d5 100644 --- a/src/sagemaker/serve/builder/model_builder.py +++ b/src/sagemaker/serve/builder/model_builder.py @@ -169,7 +169,7 @@ class ModelBuilder(Triton, DJL, JumpStart, TGI, Transformers, TensorflowServing, in order for model builder to build the artifacts correctly (according to the model server). Possible values for this argument are ``TORCHSERVE``, ``MMS``, ``TENSORFLOW_SERVING``, ``DJL_SERVING``, - ``TRITON``, and``TGI``. + ``TRITON``,``TGI``, and ``TEI``. model_metadata (Optional[Dict[str, Any]): Dictionary used to override model metadata. Currently, ``HF_TASK`` is overridable for HuggingFace model. HF_TASK should be set for new models without task metadata in the Hub, adding unsupported task types will throw diff --git a/src/sagemaker/serve/mode/local_container_mode.py b/src/sagemaker/serve/mode/local_container_mode.py index 8e1f48e593..cb28b37001 100644 --- a/src/sagemaker/serve/mode/local_container_mode.py +++ b/src/sagemaker/serve/mode/local_container_mode.py @@ -42,7 +42,6 @@ class LocalContainerMode( LocalTgiServing, LocalMultiModelServer, LocalTensorflowServing, - LocalTeiServing, ): """A class that holds methods to deploy model to a container in local environment""" @@ -72,6 +71,8 @@ def __init__( self.secret_key = None self._ping_container = None + self._tei_serving = LocalTeiServing() + def load(self, model_path: str = None): """Placeholder docstring""" path = Path(model_path if model_path else self.model_path) @@ -159,14 +160,14 @@ def create_server( ) self._ping_container = self._tensorflow_serving_deep_ping elif self.model_server == ModelServer.TEI: - self._start_tei_serving( + self._tei_serving._start_tei_serving( client=self.client, image=image, model_path=model_path if model_path else self.model_path, secret_key=secret_key, env_vars=env_vars if env_vars else self.env_vars, ) - self._ping_container = self._tei_deep_ping + self._ping_container = self._tei_serving._tei_deep_ping # allow some time for container to be ready time.sleep(10) diff --git a/src/sagemaker/serve/mode/sagemaker_endpoint_mode.py b/src/sagemaker/serve/mode/sagemaker_endpoint_mode.py index 247ec90c42..fb959de492 100644 --- a/src/sagemaker/serve/mode/sagemaker_endpoint_mode.py +++ b/src/sagemaker/serve/mode/sagemaker_endpoint_mode.py @@ -27,7 +27,6 @@ class SageMakerEndpointMode( SageMakerTgiServing, SageMakerMultiModelServer, SageMakerTensorflowServing, - SageMakerTeiServing, ): """Holds the required method to deploy a model to a SageMaker Endpoint""" @@ -39,6 +38,8 @@ def __init__(self, inference_spec: Type[InferenceSpec], model_server: ModelServe self.inference_spec = inference_spec self.model_server = model_server + self._tei_serving = SageMakerTeiServing() + def load(self, model_path: str): """Placeholder docstring""" path = Path(model_path) @@ -122,7 +123,7 @@ def prepare( ) if self.model_server == ModelServer.TEI: - upload_artifacts = self._upload_tei_artifacts( + upload_artifacts = self._tei_serving._upload_tei_artifacts( model_path=model_path, sagemaker_session=sagemaker_session, s3_model_data_url=s3_model_data_url, From 48a44e2863df6493698d76e461d428937496189b Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Mon, 20 May 2024 11:20:10 -0700 Subject: [PATCH 12/17] UT --- .../serve/model_server/tei/__init__.py | 0 .../serve/model_server/tei/test_server.py | 109 ++++++++++++++++++ 2 files changed, 109 insertions(+) create mode 100644 tests/unit/sagemaker/serve/model_server/tei/__init__.py create mode 100644 tests/unit/sagemaker/serve/model_server/tei/test_server.py diff --git a/tests/unit/sagemaker/serve/model_server/tei/__init__.py b/tests/unit/sagemaker/serve/model_server/tei/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/unit/sagemaker/serve/model_server/tei/test_server.py b/tests/unit/sagemaker/serve/model_server/tei/test_server.py new file mode 100644 index 0000000000..4d51ed4290 --- /dev/null +++ b/tests/unit/sagemaker/serve/model_server/tei/test_server.py @@ -0,0 +1,109 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +from __future__ import absolute_import + +from pathlib import PosixPath +from unittest import TestCase +from unittest.mock import Mock, patch + +from docker.types import DeviceRequest +from sagemaker.serve.model_server.tei.server import LocalTeiServing, SageMakerTeiServing + +TEI_IMAGE = ( + "246618743249.dkr.ecr.us-west-2.amazonaws.com/tei:2.0.1-tei1.2.3-gpu-py310-cu122-ubuntu22.04" +) +MODEL_PATH = "model_path" +ENV_VAR = {"KEY": "VALUE"} +PAYLOAD = { + "inputs": { + "sourceSentence": "How cute your dog is!", + "sentences": ["The mitochondria is the powerhouse of the cell.", "Your dog is so cute."], + } +} +S3_URI = "s3://mock_model_data_uri" +SECRET_KEY = "secret_key" +INFER_RESPONSE = [] + + +class TeiServerTests(TestCase): + @patch("sagemaker.serve.model_server.tei.server.requests") + def test_start_invoke_destroy_local_tei_server(self, mock_requests): + mock_container = Mock() + mock_docker_client = Mock() + mock_docker_client.containers.run.return_value = mock_container + + local_tei_server = LocalTeiServing() + mock_schema_builder = Mock() + mock_schema_builder.input_serializer.serialize.return_value = PAYLOAD + local_tei_server.schema_builder = mock_schema_builder + + local_tei_server._start_tei_serving( + client=mock_docker_client, + model_path=MODEL_PATH, + secret_key=SECRET_KEY, + image=TEI_IMAGE, + env_vars=ENV_VAR, + ) + + mock_docker_client.containers.run.assert_called_once_with( + TEI_IMAGE, + shm_size="2G", + device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])], + network_mode="host", + detach=True, + auto_remove=True, + volumes={PosixPath("model_path/code"): {"bind": "/opt/ml/model/", "mode": "rw"}}, + environment={ + "TRANSFORMERS_CACHE": "/opt/ml/model/", + "HUGGINGFACE_HUB_CACHE": "/opt/ml/model/", + "KEY": "VALUE", + "SAGEMAKER_SERVE_SECRET_KEY": "secret_key", + }, + ) + + mock_response = Mock() + mock_requests.post.side_effect = lambda *args, **kwargs: mock_response + mock_response.content = INFER_RESPONSE + + res = local_tei_server._invoke_tei_serving( + request=PAYLOAD, content_type="application/json", accept="application/json" + ) + + self.assertEqual(res, INFER_RESPONSE) + + @patch("sagemaker.serve.model_server.tei.server.S3Uploader") + def test_upload_artifacts_sagemaker_tei_server(self, mock_uploader): + mock_session = Mock() + mock_uploader.upload.side_effect = ( + lambda *args, **kwargs: "s3://sagemaker-us-west-2-123456789123/tei-2024-05-20-16-05-36-027/code" + ) + + s3_upload_path, env_vars = SageMakerTeiServing()._upload_tei_artifacts( + model_path=MODEL_PATH, + sagemaker_session=mock_session, + s3_model_data_url=S3_URI, + image=TEI_IMAGE, + ) + + mock_uploader.upload.assert_called_once() + self.assertEqual( + s3_upload_path, + { + "S3DataSource": { + "CompressionType": "None", + "S3DataType": "S3Prefix", + "S3Uri": "s3://sagemaker-us-west-2-123456789123/tei-2024-05-20-16-05-36-027/code/", + } + }, + ) + self.assertIsNotNone(env_vars) From 86739d340050a916b087e1ff55e2783f060a4572 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Mon, 20 May 2024 12:56:36 -0700 Subject: [PATCH 13/17] UT --- tests/unit/sagemaker/serve/builder/test_tei_builder.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/unit/sagemaker/serve/builder/test_tei_builder.py b/tests/unit/sagemaker/serve/builder/test_tei_builder.py index 79a8f23324..4a75174bfc 100644 --- a/tests/unit/sagemaker/serve/builder/test_tei_builder.py +++ b/tests/unit/sagemaker/serve/builder/test_tei_builder.py @@ -18,7 +18,7 @@ from sagemaker.serve.mode.function_pointers import Mode from tests.unit.sagemaker.serve.constants import MOCK_VPC_CONFIG -from sagemaker.serve.utils.predictors import TgiLocalModePredictor +from sagemaker.serve.utils.predictors import TeiLocalModePredictor mock_model_id = "bert-base-uncased" mock_prompt = "The man worked as a [MASK]." @@ -96,7 +96,7 @@ def test_build_deploy_for_tei_local_container_and_remote_container( assert model.vpc_config == MOCK_VPC_CONFIG assert builder.env_vars["MODEL_LOADING_TIMEOUT"] == "1800" - assert isinstance(predictor, TgiLocalModePredictor) + assert isinstance(predictor, TeiLocalModePredictor) assert builder.nb_instance_type == "ml.g5.24xlarge" @@ -139,7 +139,7 @@ def test_image_uri_override( assert builder.image_uri == MOCK_IMAGE_CONFIG assert builder.env_vars["MODEL_LOADING_TIMEOUT"] == "1800" - assert isinstance(predictor, TgiLocalModePredictor) + assert isinstance(predictor, TeiLocalModePredictor) assert builder.nb_instance_type == "ml.g5.24xlarge" From b9a850c75a97e9c68c6f850ff36063eb5f5951be Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Tue, 21 May 2024 08:56:05 -0700 Subject: [PATCH 14/17] Refactoring --- .../serve/mode/local_container_mode.py | 1 + .../serve/mode/sagemaker_endpoint_mode.py | 2 +- tests/integ/sagemaker/serve/test_serve_tei.py | 51 +++++-------------- 3 files changed, 16 insertions(+), 38 deletions(-) diff --git a/src/sagemaker/serve/mode/local_container_mode.py b/src/sagemaker/serve/mode/local_container_mode.py index cb28b37001..5be5d5fc15 100644 --- a/src/sagemaker/serve/mode/local_container_mode.py +++ b/src/sagemaker/serve/mode/local_container_mode.py @@ -167,6 +167,7 @@ def create_server( secret_key=secret_key, env_vars=env_vars if env_vars else self.env_vars, ) + self.container = self._tei_serving.container self._ping_container = self._tei_serving._tei_deep_ping # allow some time for container to be ready diff --git a/src/sagemaker/serve/mode/sagemaker_endpoint_mode.py b/src/sagemaker/serve/mode/sagemaker_endpoint_mode.py index fb959de492..b8f1d0529b 100644 --- a/src/sagemaker/serve/mode/sagemaker_endpoint_mode.py +++ b/src/sagemaker/serve/mode/sagemaker_endpoint_mode.py @@ -130,7 +130,7 @@ def prepare( image=image, ) - if isinstance(self.model_server, ModelServer) and upload_artifacts: + if upload_artifacts: return upload_artifacts raise ValueError("%s model server is not supported" % self.model_server) diff --git a/tests/integ/sagemaker/serve/test_serve_tei.py b/tests/integ/sagemaker/serve/test_serve_tei.py index 19ee0b57de..ac8e5c8eee 100644 --- a/tests/integ/sagemaker/serve/test_serve_tei.py +++ b/tests/integ/sagemaker/serve/test_serve_tei.py @@ -29,46 +29,26 @@ logger = logging.getLogger(__name__) sample_input = { - "inputs": "The man worked as a [MASK].", + "inputs": { + "sourceSentence": "How cute your dog is!", + "sentences": ["The mitochondria is the powerhouse of the cell.", "Your dog is so cute."], + } } -loaded_response = [ - { - "score": 0.0974755585193634, - "token": 10533, - "token_str": "carpenter", - "sequence": "the man worked as a carpenter.", - }, - { - "score": 0.052383411675691605, - "token": 15610, - "token_str": "waiter", - "sequence": "the man worked as a waiter.", - }, - { - "score": 0.04962712526321411, - "token": 13362, - "token_str": "barber", - "sequence": "the man worked as a barber.", - }, - { - "score": 0.0378861166536808, - "token": 15893, - "token_str": "mechanic", - "sequence": "the man worked as a mechanic.", - }, - { - "score": 0.037680838257074356, - "token": 18968, - "token_str": "salesman", - "sequence": "the man worked as a salesman.", - }, -] +loaded_response = [] @pytest.fixture def model_input(): - return {"inputs": "The man worked as a [MASK]."} + return { + "inputs": { + "sourceSentence": "How cute your dog is!", + "sentences": [ + "The mitochondria is the powerhouse of the cell.", + "Your dog is so cute.", + ], + } + } @pytest.fixture @@ -77,9 +57,6 @@ def model_builder_model_schema_builder(): model_path=HF_DIR, model="BAAI/bge-m3", schema_builder=SchemaBuilder(sample_input, loaded_response), - model_metadata={ - "HF_TASK": "sentence-similarity", - }, ) From 04695de53f64fb7cdcd3b6f4e31ba7b7e697051e Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Tue, 21 May 2024 10:08:44 -0700 Subject: [PATCH 15/17] Test coverage --- .../serve/model_server/tei/test_server.py | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/unit/sagemaker/serve/model_server/tei/test_server.py b/tests/unit/sagemaker/serve/model_server/tei/test_server.py index 4d51ed4290..5c69af3430 100644 --- a/tests/unit/sagemaker/serve/model_server/tei/test_server.py +++ b/tests/unit/sagemaker/serve/model_server/tei/test_server.py @@ -81,6 +81,30 @@ def test_start_invoke_destroy_local_tei_server(self, mock_requests): self.assertEqual(res, INFER_RESPONSE) + def test_tei_deep_ping(self): + mock_predictor = Mock() + mock_response = Mock() + mock_schema_builder = Mock() + + mock_predictor.predict.side_effect = lambda *args, **kwargs: mock_response + mock_schema_builder.sample_input = PAYLOAD + + local_tei_server = LocalTeiServing() + local_tei_server.schema_builder = mock_schema_builder + res = local_tei_server._tei_deep_ping(mock_predictor) + + self.assertEqual(res, (True, mock_response)) + + def test_tei_deep_ping_ex(self): + mock_predictor = Mock() + + mock_predictor.predict.side_effect = lambda *args, **kwargs: Exception() + + local_tei_server = LocalTeiServing() + res = local_tei_server._tei_deep_ping(mock_predictor) + + self.assertEqual(res, (False, None)) + @patch("sagemaker.serve.model_server.tei.server.S3Uploader") def test_upload_artifacts_sagemaker_tei_server(self, mock_uploader): mock_session = Mock() From 0ad32b85608501d60fe0046c4e0a113ad9380965 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Tue, 21 May 2024 11:55:49 -0700 Subject: [PATCH 16/17] Refactoring --- .../serve/mode/local_container_mode.py | 11 ++++++----- src/sagemaker/serve/utils/predictors.py | 2 +- tests/integ/sagemaker/serve/test_serve_tei.py | 17 ++--------------- .../serve/model_server/tei/test_server.py | 17 +++++++++++++++++ 4 files changed, 26 insertions(+), 21 deletions(-) diff --git a/src/sagemaker/serve/mode/local_container_mode.py b/src/sagemaker/serve/mode/local_container_mode.py index 5be5d5fc15..7c34b21328 100644 --- a/src/sagemaker/serve/mode/local_container_mode.py +++ b/src/sagemaker/serve/mode/local_container_mode.py @@ -70,8 +70,7 @@ def __init__( self.container = None self.secret_key = None self._ping_container = None - - self._tei_serving = LocalTeiServing() + self._invoke_serving = None def load(self, model_path: str = None): """Placeholder docstring""" @@ -160,15 +159,17 @@ def create_server( ) self._ping_container = self._tensorflow_serving_deep_ping elif self.model_server == ModelServer.TEI: - self._tei_serving._start_tei_serving( + tei_serving = LocalTeiServing() + tei_serving._start_tei_serving( client=self.client, image=image, model_path=model_path if model_path else self.model_path, secret_key=secret_key, env_vars=env_vars if env_vars else self.env_vars, ) - self.container = self._tei_serving.container - self._ping_container = self._tei_serving._tei_deep_ping + tei_serving.schema_builder = self.schema_builder + self._ping_container = tei_serving._tei_deep_ping + self._invoke_serving = tei_serving._invoke_tei_serving # allow some time for container to be ready time.sleep(10) diff --git a/src/sagemaker/serve/utils/predictors.py b/src/sagemaker/serve/utils/predictors.py index e79d4a8fd6..25a995eb48 100644 --- a/src/sagemaker/serve/utils/predictors.py +++ b/src/sagemaker/serve/utils/predictors.py @@ -227,7 +227,7 @@ def predict(self, data): return [ self.deserializer.deserialize( io.BytesIO( - self._mode_obj._invoke_tei_serving( + self._mode_obj._invoke_serving( self.serializer.serialize(data), self.content_type, self.deserializer.ACCEPT[0], diff --git a/tests/integ/sagemaker/serve/test_serve_tei.py b/tests/integ/sagemaker/serve/test_serve_tei.py index ac8e5c8eee..5cf1a3635c 100644 --- a/tests/integ/sagemaker/serve/test_serve_tei.py +++ b/tests/integ/sagemaker/serve/test_serve_tei.py @@ -28,27 +28,14 @@ logger = logging.getLogger(__name__) -sample_input = { - "inputs": { - "sourceSentence": "How cute your dog is!", - "sentences": ["The mitochondria is the powerhouse of the cell.", "Your dog is so cute."], - } -} +sample_input = {"inputs": "What is Deep Learning?"} loaded_response = [] @pytest.fixture def model_input(): - return { - "inputs": { - "sourceSentence": "How cute your dog is!", - "sentences": [ - "The mitochondria is the powerhouse of the cell.", - "Your dog is so cute.", - ], - } - } + return {"inputs": "What is Deep Learning?"} @pytest.fixture diff --git a/tests/unit/sagemaker/serve/model_server/tei/test_server.py b/tests/unit/sagemaker/serve/model_server/tei/test_server.py index 5c69af3430..16dcf12b5a 100644 --- a/tests/unit/sagemaker/serve/model_server/tei/test_server.py +++ b/tests/unit/sagemaker/serve/model_server/tei/test_server.py @@ -18,6 +18,7 @@ from docker.types import DeviceRequest from sagemaker.serve.model_server.tei.server import LocalTeiServing, SageMakerTeiServing +from sagemaker.serve.utils.exceptions import LocalModelInvocationException TEI_IMAGE = ( "246618743249.dkr.ecr.us-west-2.amazonaws.com/tei:2.0.1-tei1.2.3-gpu-py310-cu122-ubuntu22.04" @@ -95,6 +96,22 @@ def test_tei_deep_ping(self): self.assertEqual(res, (True, mock_response)) + def test_tei_deep_ping_invoke_ex(self): + mock_predictor = Mock() + mock_schema_builder = Mock() + + mock_predictor.predict.side_effect = lambda *args, **kwargs: exec( + 'raise(ValueError("422 Client Error: Unprocessable Entity for url:"))' + ) + mock_schema_builder.sample_input = PAYLOAD + + local_tei_server = LocalTeiServing() + local_tei_server.schema_builder = mock_schema_builder + + self.assertRaises( + LocalModelInvocationException, lambda: local_tei_server._tei_deep_ping(mock_predictor) + ) + def test_tei_deep_ping_ex(self): mock_predictor = Mock() From 6c45017ff9ae2e6de65bcf0ba2eb04f188e3513a Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Tue, 21 May 2024 12:14:31 -0700 Subject: [PATCH 17/17] Refactoring --- src/sagemaker/serve/mode/local_container_mode.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sagemaker/serve/mode/local_container_mode.py b/src/sagemaker/serve/mode/local_container_mode.py index 7c34b21328..f040c61c1d 100644 --- a/src/sagemaker/serve/mode/local_container_mode.py +++ b/src/sagemaker/serve/mode/local_container_mode.py @@ -168,6 +168,7 @@ def create_server( env_vars=env_vars if env_vars else self.env_vars, ) tei_serving.schema_builder = self.schema_builder + self.container = tei_serving.container self._ping_container = tei_serving._tei_deep_ping self._invoke_serving = tei_serving._invoke_tei_serving