Clarifai · phatvo9 · Nov 22, 2023 · Nov 24, 2023 · Nov 27, 2023 · Nov 28, 2023
diff --git a/clarifai/models/api.py b/clarifai/models/api.py
@@ -18,8 +18,8 @@
 from google.protobuf.json_format import MessageToDict
 from google.protobuf.struct_pb2 import Struct, Value
 
-from clarifai.auth.helper import ClarifaiAuthHelper
 from clarifai.client import create_stub
+from clarifai.client.auth.helper import ClarifaiAuthHelper
 
 
 def _make_default_value_proto(dtype, value):

diff --git a/clarifai/models/model_serving/README.md b/clarifai/models/model_serving/README.md
@@ -81,7 +81,6 @@ A generated triton model repository looks as illustrated in the directory tree a
 | `config.pbtxt` | Contains the triton model configuration used by the triton inference server to guide inference requests processing. |
 | `requirements.txt` | Contains dependencies needed by a user model to successfully make predictions.|
 | `labels.txt` | Contains labels listed one per line, a model is trained to predict. The order of labels should match the model predicted class indexes. |
-| `triton_conda.yaml` | Contains dependencies available in pre-configured execution environment. |
 | `1/inference.py` | The inference script where users write their inference code. |
 | `1/model.py` | The triton python backend model file run to serve inference requests. |
 | `1/test.py` | Contains some predefined tests in order to test inference implementation and dependencies locally. |
@@ -97,7 +96,11 @@ This script is composed of a single class that contains a default init method an
 
 import os
 from pathlib import Path
-from typing import Callable
+
+from clarifai.models.model_serving.model_config import (ModelTypes, get_model_config)
+
+config = get_model_config("clarifai-model-type") # Input your model type
+
 
 class InferenceModel:
   """User model inference class."""
@@ -112,29 +115,32 @@ class InferenceModel:
     #self.checkpoint_path: Path = os.path.join(self.base_path, "your checkpoint filename/path")
     #self.model: Callable = <load_your_model_here from checkpoint or folder>
 
-  #Add relevant model type decorator to the method below (see docs/model_types for ref.)
-  def get_predictions(self, input_data, **kwargs):
+  @config.inference.wrap_func
+  def get_predictions(self, input_data: list, **kwargs) -> list:
     """
     Main model inference method.
 
     Args:
     -----
-      input_data: A single input data item to predict on.
+      input_data: A list of input data item to predict on.
         Input data can be an image or text, etc depending on the model type.
 
+      **kwargs: your inference parameters.
+
     Returns:
     --------
-      One of the clarifai.models.model_serving.models.output types. Refer to the README/docs
+      List of one of the `clarifai.models.model_serving.models.output types` or `config.inference.return_type(your_output)`. Refer to the README/docs
     """
+
     # Delete/Comment out line below and add your inference code
     raise NotImplementedError()
 ```
 
 - `__init__()` used for one-time loading of inference time artifacts such as models, tokenizers, etc that are frequently called during inference to improve inference speed.
 
-- `get_predictions()` takes an input data item whose type depends on the task the model solves, & returns predictions for an input data item.
+- `get_predictions()` takes a list of input data items whose type depends on the task the model solves, & returns list of predictions.
 
-`get_predictions()` should return any of the output types defined under [output](docs/output.md) and the predict function MUST be decorated with a task corresponding [model type decorator](docs/model_types.md). The model type decorators are responsible for passing input request batches for prediction and formatting the resultant predictions into triton inference responses.
+`get_predictions()` should return a list of any of the output types defined under [output](docs/output.md) and the predict function MUST be decorated with a task corresponding [@config.inference.wrap_func](docs/model_types.md). The model type decorators are responsible for passing input request batches for prediction and formatting the resultant predictions into triton inference responses.
 
 Additional methods can be added to this script's `Infer` class by the user as deemed necessary for their model inference provided they are invoked inside `get_predictions()` if used at inference time.
 

diff --git a/clarifai/models/model_serving/cli/deploy_cli.py b/clarifai/models/model_serving/cli/deploy_cli.py
@@ -13,7 +13,7 @@
 """Commandline interface for model upload utils."""
 import argparse
 
-from clarifai.auth.helper import ClarifaiAuthHelper
+from clarifai.client.auth.helper import ClarifaiAuthHelper
 from clarifai.models.api import Models
 from clarifai.models.model_serving.model_config import MODEL_TYPES, get_model_config
 from clarifai.models.model_serving.model_config.inference_parameter import InferParamManager

diff --git a/clarifai/models/model_serving/examples/image_classification/README.md b/clarifai/models/model_serving/examples/image_classification/README.md
@@ -6,4 +6,7 @@ These can be used on the fly with minimal or no changes to test deploy image cla
 
 	Required files to run tests locally:
 
-	* Download the [model checkpoint from huggingface](https://huggingface.co/nateraw/vit-age-classifier/tree/main) and store it under `age_vit/1/vit-age-classifier/`
+	* Download the [model checkpoint from huggingface](https://huggingface.co/nateraw/vit-age-classifier/tree/main) and store it under `age_vit/1/checkpoint/`
+	```
+	huggingface-cli download nateraw/vit-age-classifier --local-dir age_vit/1/checkpoint/ --local-dir-use-symlinks False
+	```
diff --git a/clarifai/models/model_serving/examples/image_classification/age_vit/1/inference.py b/clarifai/models/model_serving/examples/image_classification/age_vit/1/inference.py
@@ -13,11 +13,13 @@
 
 import torch
 from scipy.special import softmax
-from transformers import ViTFeatureExtractor, ViTForImageClassification
+from transformers import AutoImageProcessor, ViTForImageClassification
 
-from clarifai.models.model_serving.models.model_types import visual_classifier
+from clarifai.models.model_serving.model_config import ModelTypes, get_model_config
 from clarifai.models.model_serving.models.output import ClassifierOutput
 
+config = get_model_config(ModelTypes.visual_classifier)
+
 
 class InferenceModel:
   """User model inference class."""
@@ -28,29 +30,35 @@ def __init__(self) -> None:
     in this method so they are loaded only once for faster inference.
     """
     self.base_path: Path = os.path.dirname(__file__)
-    self.huggingface_model_path: Path = os.path.join(self.base_path, "vit-age-classifier")
-    self.transforms = ViTFeatureExtractor.from_pretrained(self.huggingface_model_path)
+    self.huggingface_model_path: Path = os.path.join(self.base_path, "checkpoint")
+    self.transforms = AutoImageProcessor.from_pretrained(self.huggingface_model_path)
     self.model: Callable = ViTForImageClassification.from_pretrained(self.huggingface_model_path)
     self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
 
-  @visual_classifier
-  def get_predictions(self, input_data) -> ClassifierOutput:
+  @config.inference.wrap_func
+  def get_predictions(self, input_data: list, **kwargs) -> list:
     """
     Main model inference method.
 
     Args:
     -----
-      input_data: A single input data item to predict on.
+      input_data: A list of input data item to predict on.
         Input data can be an image or text, etc depending on the model type.
 
+      **kwargs: your inference parameters.
+
     Returns:
     --------
-      One of the clarifai.models.model_serving.models.output types. Refer to the README/docs
+      List of one of the `clarifai.models.model_serving.models.output types` or `config.inference.return_type(your_output)`. Refer to the README/docs
     """
     # Transform image and pass it to the model
     inputs = self.transforms(input_data, return_tensors='pt')
-    output = self.model(**inputs)
-    pred_scores = softmax(
-        output[0][0].detach().numpy())  # alt: softmax(output.logits[0].detach().numpy())
+    with torch.no_grad():
+      preds = self.model(**inputs).logits
+    outputs = []
+    for pred in preds:
+      pred_scores = softmax(
+          pred.detach().numpy())  # alt: softmax(output.logits[0].detach().numpy())
+      outputs.append(ClassifierOutput(predicted_scores=pred_scores))
 
-    return ClassifierOutput(predicted_scores=pred_scores)
+    return outputs
diff --git a/clarifai/models/model_serving/examples/image_classification/age_vit/1/model.py b/clarifai/models/model_serving/examples/image_classification/age_vit/1/model.py
@@ -21,6 +21,7 @@
   pass
 from google.protobuf import text_format
 from tritonclient.grpc.model_config_pb2 import ModelConfig
+from clarifai.models.model_serving.model_config.inference_parameter import parse_req_parameters
 
 
 class TritonPythonModel:
@@ -37,14 +38,13 @@ def initialize(self, args):
     from inference import InferenceModel
 
     self.inference_obj = InferenceModel()
-    self.device = "cuda:0" if "GPU" in args["model_instance_kind"] else "cpu"
 
     # Read input_name from config file
     self.config_msg = ModelConfig()
     with open(os.path.join(args["model_repository"], "config.pbtxt"), "r") as f:
       cfg = f.read()
     text_format.Merge(cfg, self.config_msg)
-    self.input_name = [inp.name for inp in self.config_msg.input][0]
+    self.input_names = [inp.name for inp in self.config_msg.input]
 
   def execute(self, requests):
     """
@@ -53,9 +53,22 @@ def execute(self, requests):
     responses = []
 
     for request in requests:
-      in_batch = pb_utils.get_input_tensor_by_name(request, self.input_name)
-      in_batch = in_batch.as_numpy()
-      inference_response = self.inference_obj.get_predictions(in_batch)
+      parameters = request.parameters()
+      parameters = parse_req_parameters(parameters) if parameters else {}
+
+      if len(self.input_names) == 1:
+        in_batch = pb_utils.get_input_tensor_by_name(request, self.input_names[0])
+        in_batch = in_batch.as_numpy()
+        inference_response = self.inference_obj.get_predictions(in_batch, **parameters)
+      else:
+        multi_in_batch_dict = {}
+        for input_name in self.input_names:
+          in_batch = pb_utils.get_input_tensor_by_name(request, input_name)
+          in_batch = in_batch.as_numpy() if in_batch is not None else []
+          multi_in_batch_dict.update({input_name: in_batch})
+
+        inference_response = self.inference_obj.get_predictions(multi_in_batch_dict, **parameters)
+
       responses.append(inference_response)
 
     return responses
diff --git a/clarifai/models/model_serving/examples/image_classification/age_vit/requirements.txt b/clarifai/models/model_serving/examples/image_classification/age_vit/requirements.txt
@@ -1,4 +1,4 @@
-clarifai>9.5.3 # for model upload features
+clarifai>9.10.5
 tritonclient[all]
 torch==1.13.1
 transformers==4.30.2

diff --git a/clarifai/models/model_serving/examples/multimodal_embedder/README.md b/clarifai/models/model_serving/examples/multimodal_embedder/README.md
@@ -0,0 +1,12 @@
+## Image Classification Triton Model Examples
+
+These can be used on the fly with minimal or no changes to test deploy image classification models to the Clarifai platform. See the required files section for each model below.
+
+* ### [VIT Age Classifier](./clip/)
+
+	Required files to run tests locally:
+
+	* Download the [model checkpoint from huggingface](https://huggingface.co/openai/clip-vit-base-patch32) and store it under `clip/1/checkpoint/`
+	```
+	huggingface-cli download openai/clip-vit-base-patch32 --local-dir clip/1/checkpoint/ --local-dir-use-symlinks False --exclude *.msgpack *.h5
+	```
diff --git a/clarifai/models/model_serving/examples/multimodal_embedder/clip/1/__init__.py b/clarifai/models/model_serving/examples/multimodal_embedder/clip/1/__init__.py
diff --git a/clarifai/models/model_serving/examples/multimodal_embedder/clip/1/inference.py b/clarifai/models/model_serving/examples/multimodal_embedder/clip/1/inference.py
@@ -0,0 +1,66 @@
+# This file contains boilerplate code to allow users write their model
+# inference code that will then interact with the Triton Inference Server
+# Python backend to serve end user requests.
+# The module name, module path, class name & get_predictions() method names MUST be maintained as is
+# but other methods may be added within the class as deemed fit provided
+# they are invoked within the main get_predictions() inference method
+# if they play a role in any step of model inference
+"""User model inference script."""
+
+import os
+from pathlib import Path
+
+import torch
+from transformers import CLIPModel, CLIPProcessor
+from clarifai.models.model_serving.model_config import ModelTypes, get_model_config
+
+config = get_model_config(ModelTypes.multimodal_embedder)
+
+
+class InferenceModel:
+  """User model inference class."""
+
+  def __init__(self) -> None:
+    """
+    Load inference time artifacts that are called frequently .e.g. models, tokenizers, etc.
+    in this method so they are loaded only once for faster inference.
+    """
+    self.base_path: Path = os.path.dirname(__file__)
+    ## sample model loading code:
+    #self.checkpoint_path: Path = os.path.join(self.base_path, "your checkpoint filename/path")
+    #self.model: Callable = <load_your_model_here from checkpoint or folder>
+    self.model = CLIPModel.from_pretrained(os.path.join(self.base_path, "checkpoint"))
+    self.model.eval()
+    #self.text_model = CLIPTextModel.from_pretrained(os.path.join(self.base_path, "openai/clip-vit-base-patch32"))
+    self.processor = CLIPProcessor.from_pretrained(os.path.join(self.base_path, "checkpoint"))
+
+  #Add relevant model type decorator to the method below (see docs/model_types for ref.)
+  @config.inference.wrap_func
+  def get_predictions(self, input_data, **kwargs):
+    """
+    Main model inference method.
+
+    Args:
+    -----
+      input_data: A single input data item to predict on.
+        Input data can be an image or text, etc depending on the model type.
+
+    Returns:
+    --------
+      One of the clarifai.models.model_serving.models.output types. Refer to the README/docs
+    """
+    outputs = []
+    for inp in input_data:
+      image, text = inp["image"], inp["text"]
+      with torch.no_grad():
+        inputs = self.processor(text=text, images=image, return_tensors="pt", padding=True)
+        if text is not None:
+          inputs = self.processor(text=text, return_tensors="pt", padding=True)
+          embeddings = self.model.get_text_features(**inputs)
+        else:
+          inputs = self.processor(images=image, return_tensors="pt", padding=True)
+          embeddings = self.model.get_image_features(**inputs)
+      embeddings = embeddings.squeeze().cpu().numpy()
+      outputs.append(config.inference.return_type(embedding_vector=embeddings))
+
+    return outputs
diff --git a/clarifai/models/model_serving/examples/multimodal_embedder/clip/1/model.py b/clarifai/models/model_serving/examples/multimodal_embedder/clip/1/model.py
@@ -0,0 +1,74 @@
+# Copyright 2023 Clarifai, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Triton inference server Python Backend Model."""
+
+import os
+import sys
+
+try:
+  import triton_python_backend_utils as pb_utils
+except ModuleNotFoundError:
+  pass
+from google.protobuf import text_format
+from tritonclient.grpc.model_config_pb2 import ModelConfig
+from clarifai.models.model_serving.model_config.inference_parameter import parse_req_parameters
+
+
+class TritonPythonModel:
+  """
+  Triton Python BE Model.
+  """
+
+  def initialize(self, args):
+    """
+    Triton server init.
+    """
+    args["model_repository"] = args["model_repository"].replace("/1/model.py", "")
+    sys.path.append(os.path.dirname(__file__))
+    from inference import InferenceModel
+
+    self.inference_obj = InferenceModel()
+
+    # Read input_name from config file
+    self.config_msg = ModelConfig()
+    with open(os.path.join(args["model_repository"], "config.pbtxt"), "r") as f:
+      cfg = f.read()
+    text_format.Merge(cfg, self.config_msg)
+    self.input_names = [inp.name for inp in self.config_msg.input]
+
+  def execute(self, requests):
+    """
+    Serve model inference requests.
+    """
+    responses = []
+
+    for request in requests:
+      parameters = request.parameters()
+      parameters = parse_req_parameters(parameters) if parameters else {}
+
+      if len(self.input_names) == 1:
+        in_batch = pb_utils.get_input_tensor_by_name(request, self.input_names[0])
+        in_batch = in_batch.as_numpy()
+        inference_response = self.inference_obj.get_predictions(in_batch, **parameters)
+      else:
+        multi_in_batch_dict = {}
+        for input_name in self.input_names:
+          in_batch = pb_utils.get_input_tensor_by_name(request, input_name)
+          in_batch = in_batch.as_numpy() if in_batch is not None else []
+          multi_in_batch_dict.update({input_name: in_batch})
+
+        inference_response = self.inference_obj.get_predictions(multi_in_batch_dict, **parameters)
+
+      responses.append(inference_response)
+
+    return responses