From 88b68afd5f10dad1457cb3756e79a8cda45e5d15 Mon Sep 17 00:00:00 2001 From: Matthew Zeiler Date: Mon, 7 Oct 2024 12:25:34 -0400 Subject: [PATCH] Revert "Remove old triton upload" --- README.md | 16 +- model_upload/README.md | 19 ++ model_upload/multimodal_embedder/README.md | 39 +++ .../clip/clarifai_config.yaml | 31 +++ .../multimodal_embedder/clip/inference.py | 62 +++++ .../multimodal_embedder/clip/requirements.txt | 4 + model_upload/multimodal_embedder/clip/test.py | 40 +++ model_upload/text_classifier/README.md | 39 +++ .../xlm-roberta/clarifai_config.yaml | 13 + .../text_classifier/xlm-roberta/inference.py | 56 ++++ .../xlm-roberta/requirements.txt | 7 + .../text_classifier/xlm-roberta/test.py | 40 +++ model_upload/text_embedder/README.md | 37 +++ .../instructor-xl/clarifai_config.yaml | 10 + .../text_embedder/instructor-xl/inference.py | 57 ++++ .../instructor-xl/requirements.txt | 7 + .../text_embedder/instructor-xl/test.py | 40 +++ model_upload/text_to_image/README.md | 36 +++ .../sd-v1.5/clarifai_config.yaml | 22 ++ .../text_to_image/sd-v1.5/inference.py | 62 +++++ .../text_to_image/sd-v1.5/requirements.txt | 6 + model_upload/text_to_image/sd-v1.5/test.py | 47 ++++ model_upload/text_to_text/README.md | 65 +++++ .../hf-model/clarifai_config.yaml | 30 +++ .../text_to_text/hf-model/inference.py | 74 ++++++ .../text_to_text/hf-model/requirements.txt | 11 + model_upload/text_to_text/hf-model/test.py | 47 ++++ .../vllm-model/clarifai_config.yaml | 26 ++ .../text_to_text/vllm-model/inference.py | 55 ++++ .../text_to_text/vllm-model/requirements.txt | 5 + model_upload/text_to_text/vllm-model/test.py | 41 +++ model_upload/visual_classsifier/README.md | 35 +++ .../age_vit/clarifai_config.yaml | 19 ++ .../visual_classsifier/age_vit/inference.py | 52 ++++ .../age_vit/requirements.txt | 7 + .../visual_classsifier/age_vit/test.py | 40 +++ model_upload/visual_detector/Readme.md | 87 +++++++ .../clarifai_config.yaml | 101 ++++++++ .../faster-rcnn_torchserve/inference.py | 68 +++++ .../model_store/hub/checkpoints/keep | 0 .../faster-rcnn_torchserve/requirements.txt | 3 + .../faster-rcnn_torchserve/test.py | 36 +++ .../yolof/clarifai_config.yaml | 90 +++++++ .../yolof/config/yolof_r50_c5_8x8_1x_coco.py | 245 ++++++++++++++++++ .../visual_detector/yolof/inference.py | 62 +++++ .../visual_detector/yolof/requirements.txt | 9 + model_upload/visual_detector/yolof/test.py | 36 +++ .../yolox/clarifai_config.yaml | 90 +++++++ .../yolox/configs/_base_/default_runtime.py | 24 ++ .../configs/_base_/schedules/schedule_1x.py | 28 ++ .../configs/yolox/yolox_l_8xb8-300e_coco.py | 8 + .../configs/yolox/yolox_m_8xb8-300e_coco.py | 8 + .../yolox/yolox_nano_8xb8-300e_coco.py | 11 + .../configs/yolox/yolox_s_8xb8-300e_coco.py | 236 +++++++++++++++++ .../yolox/yolox_tiny_8xb8-300e_coco.py | 61 +++++ .../configs/yolox/yolox_x_8xb8-300e_coco.py | 8 + .../visual_detector/yolox/inference.py | 61 +++++ .../visual_detector/yolox/requirements.txt | 9 + model_upload/visual_detector/yolox/test.py | 36 +++ model_upload/visual_embedder/README.md | 36 +++ .../vit-base/clarifai_config.yaml | 37 +++ .../visual_embedder/vit-base/inference.py | 46 ++++ .../visual_embedder/vit-base/requirements.txt | 5 + model_upload/visual_embedder/vit-base/test.py | 40 +++ model_upload/visual_segmenter/README.md | 36 +++ .../segformer-b2/clarifai_config.yaml | 28 ++ .../segformer-b2/inference.py | 49 ++++ .../segformer-b2/requirements.txt | 5 + .../visual_segmenter/segformer-b2/test.py | 40 +++ models/model_upload/README.md | 2 - 70 files changed, 2834 insertions(+), 4 deletions(-) create mode 100644 model_upload/README.md create mode 100644 model_upload/multimodal_embedder/README.md create mode 100644 model_upload/multimodal_embedder/clip/clarifai_config.yaml create mode 100644 model_upload/multimodal_embedder/clip/inference.py create mode 100644 model_upload/multimodal_embedder/clip/requirements.txt create mode 100644 model_upload/multimodal_embedder/clip/test.py create mode 100644 model_upload/text_classifier/README.md create mode 100644 model_upload/text_classifier/xlm-roberta/clarifai_config.yaml create mode 100644 model_upload/text_classifier/xlm-roberta/inference.py create mode 100644 model_upload/text_classifier/xlm-roberta/requirements.txt create mode 100644 model_upload/text_classifier/xlm-roberta/test.py create mode 100644 model_upload/text_embedder/README.md create mode 100644 model_upload/text_embedder/instructor-xl/clarifai_config.yaml create mode 100644 model_upload/text_embedder/instructor-xl/inference.py create mode 100644 model_upload/text_embedder/instructor-xl/requirements.txt create mode 100644 model_upload/text_embedder/instructor-xl/test.py create mode 100644 model_upload/text_to_image/README.md create mode 100644 model_upload/text_to_image/sd-v1.5/clarifai_config.yaml create mode 100644 model_upload/text_to_image/sd-v1.5/inference.py create mode 100644 model_upload/text_to_image/sd-v1.5/requirements.txt create mode 100644 model_upload/text_to_image/sd-v1.5/test.py create mode 100644 model_upload/text_to_text/README.md create mode 100644 model_upload/text_to_text/hf-model/clarifai_config.yaml create mode 100644 model_upload/text_to_text/hf-model/inference.py create mode 100644 model_upload/text_to_text/hf-model/requirements.txt create mode 100644 model_upload/text_to_text/hf-model/test.py create mode 100644 model_upload/text_to_text/vllm-model/clarifai_config.yaml create mode 100644 model_upload/text_to_text/vllm-model/inference.py create mode 100644 model_upload/text_to_text/vllm-model/requirements.txt create mode 100644 model_upload/text_to_text/vllm-model/test.py create mode 100644 model_upload/visual_classsifier/README.md create mode 100644 model_upload/visual_classsifier/age_vit/clarifai_config.yaml create mode 100644 model_upload/visual_classsifier/age_vit/inference.py create mode 100644 model_upload/visual_classsifier/age_vit/requirements.txt create mode 100644 model_upload/visual_classsifier/age_vit/test.py create mode 100644 model_upload/visual_detector/Readme.md create mode 100644 model_upload/visual_detector/faster-rcnn_torchserve/clarifai_config.yaml create mode 100644 model_upload/visual_detector/faster-rcnn_torchserve/inference.py create mode 100644 model_upload/visual_detector/faster-rcnn_torchserve/model_store/hub/checkpoints/keep create mode 100644 model_upload/visual_detector/faster-rcnn_torchserve/requirements.txt create mode 100644 model_upload/visual_detector/faster-rcnn_torchserve/test.py create mode 100644 model_upload/visual_detector/yolof/clarifai_config.yaml create mode 100644 model_upload/visual_detector/yolof/config/yolof_r50_c5_8x8_1x_coco.py create mode 100644 model_upload/visual_detector/yolof/inference.py create mode 100644 model_upload/visual_detector/yolof/requirements.txt create mode 100644 model_upload/visual_detector/yolof/test.py create mode 100644 model_upload/visual_detector/yolox/clarifai_config.yaml create mode 100644 model_upload/visual_detector/yolox/configs/_base_/default_runtime.py create mode 100644 model_upload/visual_detector/yolox/configs/_base_/schedules/schedule_1x.py create mode 100644 model_upload/visual_detector/yolox/configs/yolox/yolox_l_8xb8-300e_coco.py create mode 100644 model_upload/visual_detector/yolox/configs/yolox/yolox_m_8xb8-300e_coco.py create mode 100644 model_upload/visual_detector/yolox/configs/yolox/yolox_nano_8xb8-300e_coco.py create mode 100644 model_upload/visual_detector/yolox/configs/yolox/yolox_s_8xb8-300e_coco.py create mode 100644 model_upload/visual_detector/yolox/configs/yolox/yolox_tiny_8xb8-300e_coco.py create mode 100644 model_upload/visual_detector/yolox/configs/yolox/yolox_x_8xb8-300e_coco.py create mode 100644 model_upload/visual_detector/yolox/inference.py create mode 100644 model_upload/visual_detector/yolox/requirements.txt create mode 100644 model_upload/visual_detector/yolox/test.py create mode 100644 model_upload/visual_embedder/README.md create mode 100644 model_upload/visual_embedder/vit-base/clarifai_config.yaml create mode 100644 model_upload/visual_embedder/vit-base/inference.py create mode 100644 model_upload/visual_embedder/vit-base/requirements.txt create mode 100644 model_upload/visual_embedder/vit-base/test.py create mode 100644 model_upload/visual_segmenter/README.md create mode 100644 model_upload/visual_segmenter/segformer-b2/clarifai_config.yaml create mode 100644 model_upload/visual_segmenter/segformer-b2/inference.py create mode 100644 model_upload/visual_segmenter/segformer-b2/requirements.txt create mode 100644 model_upload/visual_segmenter/segformer-b2/test.py diff --git a/README.md b/README.md index e54c374..db62d8b 100644 --- a/README.md +++ b/README.md @@ -82,8 +82,20 @@ dataset.upload_dataset(task="text_clf", split="train", module_dir="path_to_imdb_ ## Model upload examples - -There is an exciting new model upload experience now in private preview. We'd love for you to try it out and give us feedback! If you're interested, please sign up for private preview [here](https://forms.gle/MSx7QNxmug2oFZYD6). +### How to start +Please refer to this [doc](https://github.com/Clarifai/clarifai-python/tree/master/clarifai/models/model_serving) +### Examples +| Model type | Example | +| ----------- | ----------- | +| [multimodal-embedder](./model_upload/multimodal_embedder/) | [CLIP](./model_upload/multimodal_embedder/clip/) | +| [text-classifier](./model_upload/text_classifier) | [xlm-roberta](./model_upload/text_classifier/xlm-roberta/) | +| [text-embedder](./model_upload/text_embedder/) | [instructor-xl](./model_upload/text_embedder/instructor-xl/) | +| [text-to-image](./model_upload/text_to_image/) | [sd-v1.5](./model_upload/text_to_image/sd-v1.5/) | +| [text-to-text](./model_upload/text_to_text/) | [bart-summarize](./model_upload/text_to_text/bart-summarize/), [vllm model](./model_upload/vllm_text_to_text/example/) | +| [visual_classsifier](./model_upload/visual_classsifier/) | [age_vit](./model_upload/visual_classsifier/age_vit/) | +| [visual_detector](./model_upload/visual_detector/) | [yolof](./model_upload/visual_detector/yolof/), [faster-rcnn_torchserve](./model_upload/visual_detector/faster-rcnn_torchserve/) | +| [visual_embedder](./model_upload/visual_embedder) | [vit-base](./model_upload/visual_embedder/vit-base/) | +| [visual_segmenter](./model_upload/visual_segmenter) | [segformer-b2](./model_upload/visual_segmenter/segformer-b2/) | ## Note diff --git a/model_upload/README.md b/model_upload/README.md new file mode 100644 index 0000000..21a4854 --- /dev/null +++ b/model_upload/README.md @@ -0,0 +1,19 @@ +## Clarifai Model Upload Examples + +A collection of pre-built models for different tasks. To run inference locally using any of the examples here, some models may require other files such as checkpoints be downloaded before testing and/or deployment to Clarifai as they are ommitted here due to github file size limits. + +See the Readme files under each model to see detail instruction of how to download the additional files and deploy the models. + +## Prerequisite + +Install latest `clarifai` + +```bash +pip install --upgrade clarifai +``` + +Login to the platform for uploading model + +``` +clarifai login +``` diff --git a/model_upload/multimodal_embedder/README.md b/model_upload/multimodal_embedder/README.md new file mode 100644 index 0000000..b297c3e --- /dev/null +++ b/model_upload/multimodal_embedder/README.md @@ -0,0 +1,39 @@ +## Multimodal Embedder Model Examples + +These can be used on the fly with minimal or no changes to test deploy image classification models to the Clarifai platform. See the required files section for each model below. + +* ### [CLIP](./clip/) + + Required files to run tests locally: + + Download the [model checkpoint from huggingface](https://huggingface.co/openai/clip-vit-base-patch32) and store it under `clip/checkpoint/` + + + ``` + $ pip install huggingface-hub + $ huggingface-cli download openai/clip-vit-base-patch32 --local-dir clip/checkpoint/ --local-dir-use-symlinks False --exclude *.msgpack *.h5 + ``` + + Install dependecies to test locally + + ```bash + $ pip install -r clip/requirements.txt + ``` + + Deploy the model to Clarifai: + + >Note: set `--no-test` flag for `build` and `upload` command to disable testing + + 1. Build + + ```bash + $ clarifai build model ./clip + ``` + + upload `*.clarifai` file to storage to obtain direct download url + + 2. Upload + + ```bash + $ clarifai upload model ./clip --url + ``` diff --git a/model_upload/multimodal_embedder/clip/clarifai_config.yaml b/model_upload/multimodal_embedder/clip/clarifai_config.yaml new file mode 100644 index 0000000..776ece9 --- /dev/null +++ b/model_upload/multimodal_embedder/clip/clarifai_config.yaml @@ -0,0 +1,31 @@ +# Sample config of inference_parameters and labels +# For detail, please refer to docs +# -------------------- +# inference_parameters: +# - path: boolean_var +# default_value: true +# field_type: 1 +# description: a boolean variable +# - path: string_var +# default_value: "a string" +# field_type: 2 +# description: a string variable +# - path: number_var +# default_value: 1 +# field_type: 3 +# description: a number variable +# - path: secret_string_var +# default_value: "YOUR_SECRET" +# field_type: 21 +# description: a string variable contains secret like API key + +clarifai_model: + clarifai_model_id: '' + clarifai_user_app_id: '' + description: '' + inference_parameters: [] + labels: [] + type: multimodal-embedder +serving_backend: + triton: + max_batch_size: 4 diff --git a/model_upload/multimodal_embedder/clip/inference.py b/model_upload/multimodal_embedder/clip/inference.py new file mode 100644 index 0000000..b806539 --- /dev/null +++ b/model_upload/multimodal_embedder/clip/inference.py @@ -0,0 +1,62 @@ +# User model inference script. + +import os +from pathlib import Path +from typing import Dict, Union +from clarifai.models.model_serving.model_config import * # noqa +import torch +from transformers import CLIPModel, CLIPProcessor + +class InferenceModel(MultiModalEmbedder): + """User model inference class.""" + + def __init__(self) -> None: + """ + Load inference time artifacts that are called frequently .e.g. models, tokenizers, etc. + in this method so they are loaded only once for faster inference. + """ + # current directory + self.base_path: Path = os.path.dirname(__file__) + # local checkpoint for openai/clip-vit-base-patch32 + self.model = CLIPModel.from_pretrained(os.path.join(self.base_path, "checkpoint")) + self.model.eval() + self.processor = CLIPProcessor.from_pretrained(os.path.join(self.base_path, "checkpoint")) + + def predict(self, input_data: list, + inference_parameters: Dict[str, Union[str, float, int, bool]] = {}) -> list: + """ Custom prediction function for `multimodal-embedder` model. + + Args: + input_data (List[_MultiModalInputTypeDict]): List of dict of key-value: `image`(np.ndarray) and `text` (str) + inference_parameters (Dict[str, Union[str, float, int, bool]]): your inference parameters + + Returns: + list of EmbeddingOutput + + """ + + outputs = [] + for inp in input_data: + image, text = inp.get("image", None), inp.get("text", None) + with torch.no_grad(): + inputs = self.processor(text=text, images=image, return_tensors="pt", padding=True) + if text is not None: + inputs = self.processor(text=text, return_tensors="pt", padding=True) + embeddings = self.model.get_text_features(**inputs) + else: + inputs = self.processor(images=image, return_tensors="pt", padding=True) + embeddings = self.model.get_image_features(**inputs) + embeddings = embeddings.squeeze().cpu().numpy() + outputs.append(EmbeddingOutput(embedding_vector=embeddings)) + + return outputs + + +if __name__ == "__main__": + + # Dummy test + model = InferenceModel() + input = dict(text="Hi") + + output = model.predict([input]) + print(output[0]) \ No newline at end of file diff --git a/model_upload/multimodal_embedder/clip/requirements.txt b/model_upload/multimodal_embedder/clip/requirements.txt new file mode 100644 index 0000000..9e0a61e --- /dev/null +++ b/model_upload/multimodal_embedder/clip/requirements.txt @@ -0,0 +1,4 @@ +clarifai +tritonclient[all] +transformers==4.38.0 +torch==2.1.1 diff --git a/model_upload/multimodal_embedder/clip/test.py b/model_upload/multimodal_embedder/clip/test.py new file mode 100644 index 0000000..3e90821 --- /dev/null +++ b/model_upload/multimodal_embedder/clip/test.py @@ -0,0 +1,40 @@ +import unittest + +from clarifai.models.model_serving.repo_build import BaseTest + + +class CustomTest(unittest.TestCase): + """ + BaseTest loads the InferenceModel from the inference.py file in the current working directory. + To execute the predict method of the InferenceModel, use the predict method in BaseTest. + It takes the exact same inputs and inference parameters, returning the same outputs as InferenceModel.predict. + The difference is that BaseTest.predict verifies your_infer_parameters against config.clarifai_models.inference_parameters and checks the output values. + + For example, test input value of visual-classifier + + def test_input(self): + import cv2 + path = "path/to/image" + img = cv2.imread(path) + outputs = self.model.predict([img], infer_param1=..., infer_param2=...) + print(outputs) + assert outputs + + """ + + def setUp(self) -> None: + your_infer_parameter = dict( + ) # for example dict(float_var=0.12, string_var="test", _secret_string_var="secret") + self.model = BaseTest(your_infer_parameter) + + def test_default_cases(self): + """Test your model with dummy inputs. + In general, you only need to run this test to check your InferneceModel implementation. + In case the default inputs makes your model failed for some reason (not because of assert in `test_with_default_inputs`), + you can comment out this test. + """ + self.model.test_with_default_inputs() + + def test_specific_case1(self): + """ Implement your test case""" + pass diff --git a/model_upload/text_classifier/README.md b/model_upload/text_classifier/README.md new file mode 100644 index 0000000..4b025a9 --- /dev/null +++ b/model_upload/text_classifier/README.md @@ -0,0 +1,39 @@ +## Text Classification Model Examples + +These can be used on the fly with minimal or no changes to test deploy text classification models to the Clarifai platform. See the required files section for each model below. + +* ### [XLM-Roberta Tweet Sentiment Classifier](./xlm-roberta/) + + Required files to run tests locally: + + Download the [model checkpoint](https://huggingface.co/cardiffnlp/twitter-xlm-roberta-base-sentiment/tree/main) and store it under `xlm-roberta/checkpoint/` + + ``` + $ pip install huggingface-hub + $ huggingface-cli download cardiffnlp/twitter-xlm-roberta-base-sentiment --local-dir xlm-roberta/checkpoint/ --local-dir-use-symlinks False --exclude *.msgpack *.h5 + ``` + + Install dependecies to test locally + + ```bash + $ pip install -r xlm-roberta/requirements.txt + ``` + + Deploy the model to Clarifai: + + >Note: set `--no-test` flag for `build` and `upload` command to disable testing + + 1. Build + + ```bash + $ clarifai build model ./xlm-roberta + ``` + + upload `*.clarifai` file to storage to obtain direct download url + + 2. Upload + + ```bash + $ clarifai upload model ./xlm-roberta --url + ``` + diff --git a/model_upload/text_classifier/xlm-roberta/clarifai_config.yaml b/model_upload/text_classifier/xlm-roberta/clarifai_config.yaml new file mode 100644 index 0000000..274a1c5 --- /dev/null +++ b/model_upload/text_classifier/xlm-roberta/clarifai_config.yaml @@ -0,0 +1,13 @@ +clarifai_model: + clarifai_model_id: '' + clarifai_user_app_id: '' + description: '' + inference_parameters: [] + labels: + - Negative + - Neutral + - Positive + type: text-classifier +serving_backend: + triton: + max_batch_size: 4 diff --git a/model_upload/text_classifier/xlm-roberta/inference.py b/model_upload/text_classifier/xlm-roberta/inference.py new file mode 100644 index 0000000..42fac1a --- /dev/null +++ b/model_upload/text_classifier/xlm-roberta/inference.py @@ -0,0 +1,56 @@ +# User model inference script. + +import os +from pathlib import Path +from typing import Dict, Union +from clarifai.models.model_serving.model_config import * # noqa + +import torch +from scipy.special import softmax +from transformers import AutoModelForSequenceClassification, AutoTokenizer + +class InferenceModel(TextClassifier): + """User model inference class.""" + + def __init__(self) -> None: + """ + Load inference time artifacts that are called frequently .e.g. models, tokenizers, etc. + in this method so they are loaded only once for faster inference. + """ + # current directory + self.base_path: Path = os.path.dirname(__file__) + self.checkpoint_path: Path = os.path.join(self.base_path, "checkpoint") + self.model = AutoModelForSequenceClassification.from_pretrained(self.checkpoint_path) + self.tokenizer = AutoTokenizer.from_pretrained(self.checkpoint_path) + + def predict(self, input_data: list, + inference_parameters: Dict[str, Union[str, float, int]]) -> list: + """ Custom prediction function for `text-classifier` model. + + Args: + input_data (List[str]): List of text + inference_parameters (Dict[str, Union[str, float, int]]): your inference parameters + + Returns: + list of ClassifierOutput + + """ + + outputs = [] + for inp in input_data: + encoded_input = self.tokenizer(inp, return_tensors='pt') + output = self.model(**encoded_input) + scores = output[0][0].detach().numpy() + scores = softmax(scores) + outputs.append(ClassifierOutput(predicted_scores=scores)) + + return outputs + +if __name__ == "__main__": + + # Dummy test + model = InferenceModel() + input = "How are you today?" + + output = model.predict([input]) + print(output[0]) \ No newline at end of file diff --git a/model_upload/text_classifier/xlm-roberta/requirements.txt b/model_upload/text_classifier/xlm-roberta/requirements.txt new file mode 100644 index 0000000..1931929 --- /dev/null +++ b/model_upload/text_classifier/xlm-roberta/requirements.txt @@ -0,0 +1,7 @@ +clarifai +tritonclient[all] +torch==1.13.1 +transformers==4.38.0 +scipy==1.10.1 +sentencepiece==0.1.99 +protobuf<4.21.3 diff --git a/model_upload/text_classifier/xlm-roberta/test.py b/model_upload/text_classifier/xlm-roberta/test.py new file mode 100644 index 0000000..3e90821 --- /dev/null +++ b/model_upload/text_classifier/xlm-roberta/test.py @@ -0,0 +1,40 @@ +import unittest + +from clarifai.models.model_serving.repo_build import BaseTest + + +class CustomTest(unittest.TestCase): + """ + BaseTest loads the InferenceModel from the inference.py file in the current working directory. + To execute the predict method of the InferenceModel, use the predict method in BaseTest. + It takes the exact same inputs and inference parameters, returning the same outputs as InferenceModel.predict. + The difference is that BaseTest.predict verifies your_infer_parameters against config.clarifai_models.inference_parameters and checks the output values. + + For example, test input value of visual-classifier + + def test_input(self): + import cv2 + path = "path/to/image" + img = cv2.imread(path) + outputs = self.model.predict([img], infer_param1=..., infer_param2=...) + print(outputs) + assert outputs + + """ + + def setUp(self) -> None: + your_infer_parameter = dict( + ) # for example dict(float_var=0.12, string_var="test", _secret_string_var="secret") + self.model = BaseTest(your_infer_parameter) + + def test_default_cases(self): + """Test your model with dummy inputs. + In general, you only need to run this test to check your InferneceModel implementation. + In case the default inputs makes your model failed for some reason (not because of assert in `test_with_default_inputs`), + you can comment out this test. + """ + self.model.test_with_default_inputs() + + def test_specific_case1(self): + """ Implement your test case""" + pass diff --git a/model_upload/text_embedder/README.md b/model_upload/text_embedder/README.md new file mode 100644 index 0000000..ddc0c28 --- /dev/null +++ b/model_upload/text_embedder/README.md @@ -0,0 +1,37 @@ +## Text Embedder Model Examples + +These can be used on the fly with minimal or no changes to test deploy text embedding models to the Clarifai platform. See the required files section for each model below. + +* ### [Instructor-xl](https://huggingface.co/hkunlp/instructor-xl) + + Requirements to run tests locally: + + Download/Clone the [huggingface model](https://huggingface.co/hkunlp/instructor-xl) into the **instructor-xl/checkpoint** directory. + ``` + huggingface-cli download hkunlp/instructor-xl --local-dir instructor-xl/checkpoint/sentence_transformers/hkunlp_instructor-xl --local-dir-use-symlinks False + ``` + + Install dependecies to test locally + + ```bash + $ pip install -r instructor-xl/requirements.txt + ``` + + Deploy the model to Clarifai: + + >Note: set `--no-test` flag for `build` and `upload` command to disable testing + + 1. Build + + ```bash + $ clarifai build model ./instructor-xl + ``` + + upload `*.clarifai` file to storage to obtain direct download url + + 2. Upload + + ```bash + $ clarifai upload model ./instructor-xl --url + ``` + diff --git a/model_upload/text_embedder/instructor-xl/clarifai_config.yaml b/model_upload/text_embedder/instructor-xl/clarifai_config.yaml new file mode 100644 index 0000000..5fff454 --- /dev/null +++ b/model_upload/text_embedder/instructor-xl/clarifai_config.yaml @@ -0,0 +1,10 @@ +clarifai_model: + clarifai_model_id: '' + clarifai_user_app_id: '' + description: '' + inference_parameters: [] + labels: [] + type: text-embedder +serving_backend: + triton: + max_batch_size: 4 diff --git a/model_upload/text_embedder/instructor-xl/inference.py b/model_upload/text_embedder/instructor-xl/inference.py new file mode 100644 index 0000000..d697503 --- /dev/null +++ b/model_upload/text_embedder/instructor-xl/inference.py @@ -0,0 +1,57 @@ +# User model inference script. + +import os +from pathlib import Path +from typing import Dict, Union + +# Set up env for huggingface +ROOT_PATH = os.path.join(os.path.dirname(__file__)) +PIPELINE_PATH = os.path.join(ROOT_PATH, 'checkpoint') + +os.environ['TORCH_HOME'] = PIPELINE_PATH +os.environ['TRANSFORMERS_CACHE'] = PIPELINE_PATH # noqa +#os.environ["TRANSFORMERS_OFFLINE"] = "1" # noqa + +import torch # noqa +from InstructorEmbedding import INSTRUCTOR # noqa +from clarifai.models.model_serving.model_config import * # noqa + + +class InferenceModel(TextEmbedder): + """User model inference class.""" + + def __init__(self) -> None: + """ + Load inference time artifacts that are called frequently .e.g. models, tokenizers, etc. + in this method so they are loaded only once for faster inference. + """ + # current directory + self.base_path: Path = os.path.dirname(__file__) + self.device = "cuda:0" if torch.cuda.is_available() else "cpu" + self.model = INSTRUCTOR('hkunlp/instructor-xl') + + def predict(self, input_data: list, + inference_parameters: Dict[str, Union[str, float, int]]) -> list: + """ Custom prediction function for `text-embedder` model. + + Args: + input_data (List[str]): List of text + inference_parameters (Dict[str, Union[str, float, int]]): your inference parameters + + Returns: + list of EmbeddingOutput + + """ + + batch_preds = self.model.encode(input_data, device=self.device) + + return [EmbeddingOutput(each) for each in batch_preds] + +if __name__ == "__main__": + + # Dummy test + model = InferenceModel() + input = "How are you today?" + + output = model.predict([input]) + print(output[0]) diff --git a/model_upload/text_embedder/instructor-xl/requirements.txt b/model_upload/text_embedder/instructor-xl/requirements.txt new file mode 100644 index 0000000..759c866 --- /dev/null +++ b/model_upload/text_embedder/instructor-xl/requirements.txt @@ -0,0 +1,7 @@ +clarifai +tritonclient[all] +torch==1.13.1 +scipy==1.10.1 +einops==0.6.1 +InstructorEmbedding==1.0.1 +sentence_transformers==2.2.2 diff --git a/model_upload/text_embedder/instructor-xl/test.py b/model_upload/text_embedder/instructor-xl/test.py new file mode 100644 index 0000000..3e90821 --- /dev/null +++ b/model_upload/text_embedder/instructor-xl/test.py @@ -0,0 +1,40 @@ +import unittest + +from clarifai.models.model_serving.repo_build import BaseTest + + +class CustomTest(unittest.TestCase): + """ + BaseTest loads the InferenceModel from the inference.py file in the current working directory. + To execute the predict method of the InferenceModel, use the predict method in BaseTest. + It takes the exact same inputs and inference parameters, returning the same outputs as InferenceModel.predict. + The difference is that BaseTest.predict verifies your_infer_parameters against config.clarifai_models.inference_parameters and checks the output values. + + For example, test input value of visual-classifier + + def test_input(self): + import cv2 + path = "path/to/image" + img = cv2.imread(path) + outputs = self.model.predict([img], infer_param1=..., infer_param2=...) + print(outputs) + assert outputs + + """ + + def setUp(self) -> None: + your_infer_parameter = dict( + ) # for example dict(float_var=0.12, string_var="test", _secret_string_var="secret") + self.model = BaseTest(your_infer_parameter) + + def test_default_cases(self): + """Test your model with dummy inputs. + In general, you only need to run this test to check your InferneceModel implementation. + In case the default inputs makes your model failed for some reason (not because of assert in `test_with_default_inputs`), + you can comment out this test. + """ + self.model.test_with_default_inputs() + + def test_specific_case1(self): + """ Implement your test case""" + pass diff --git a/model_upload/text_to_image/README.md b/model_upload/text_to_image/README.md new file mode 100644 index 0000000..c16fc7c --- /dev/null +++ b/model_upload/text_to_image/README.md @@ -0,0 +1,36 @@ +## Text to Image Model Examples + +These can be used on the fly with minimal or no changes to test deploy text to image models to the Clarifai platform. See the required files section for each model below. + +* ### [sd-v1.5 (Stable-Diffusion-v1.5)](./sd-v1.5/) + + Download the [model checkpoint](https://huggingface.co/runwayml/stable-diffusion-v1-5/tree/main) and store it under `sd-v1.5/checkpoint` + + ```bash + $ pip install huggingface-hub + $ huggingface-cli download runwayml/stable-diffusion-v1-5 --local-dir sd-v1.5/checkpoint --local-dir-use-symlinks False --exclude *.safetensors *.non_ema.bin *.fp16.bin *.ckpt + ``` + + Install dependecies to test locally + + ```bash + $ pip install -r sd-v1.5/requirements.txt + ``` + + Deploy the model to Clarifai: + + >Note: set `--no-test` flag for `build` and `upload` command to disable testing + + 1. Build + + ```bash + $ clarifai build model ./sd-v1.5 + ``` + + upload `*.clarifai` file to storage to obtain direct download url + + 2. Upload + + ```bash + $ clarifai upload model ./sd-v1.5 --url + ``` diff --git a/model_upload/text_to_image/sd-v1.5/clarifai_config.yaml b/model_upload/text_to_image/sd-v1.5/clarifai_config.yaml new file mode 100644 index 0000000..01197b5 --- /dev/null +++ b/model_upload/text_to_image/sd-v1.5/clarifai_config.yaml @@ -0,0 +1,22 @@ +clarifai_model: + clarifai_model_id: '' + clarifai_user_app_id: '' + description: '' + inference_parameters: + - default_value: 30 + description: num_inference_steps + field_type: 3 + path: num_inference_steps + - default_value: 7.5 + description: guidance_scale + field_type: 3 + path: guidance_scale + - default_value: low quality + description: negative_prompt + field_type: 2 + path: negative_prompt + labels: [] + type: text-to-image +serving_backend: + triton: + max_batch_size: 4 diff --git a/model_upload/text_to_image/sd-v1.5/inference.py b/model_upload/text_to_image/sd-v1.5/inference.py new file mode 100644 index 0000000..eacb5dc --- /dev/null +++ b/model_upload/text_to_image/sd-v1.5/inference.py @@ -0,0 +1,62 @@ +# User model inference script. + +import os +from pathlib import Path +from typing import Dict, Union + +import numpy as np +import torch +from diffusers import StableDiffusionPipeline + +from clarifai.models.model_serving.model_config import * # noqa + + +class InferenceModel(TextToImage): + """User model inference class.""" + + def __init__(self) -> None: + """ + Load inference time artifacts that are called frequently .e.g. models, tokenizers, etc. + in this method so they are loaded only once for faster inference. + """ + # current directory + self.base_path: Path = os.path.dirname(__file__) + self.huggingface_model_path = os.path.join(self.base_path, "checkpoint") + self.device = "cuda" if torch.cuda.is_available() else "cpu" + self.pipeline = StableDiffusionPipeline.from_pretrained( + self.huggingface_model_path, torch_dtype=torch.float16) + self.pipeline = self.pipeline.to(self.device) + + def predict(self, input_data: list, + inference_parameters: Dict[str, Union[str, float, int]]) -> list: + """ Custom prediction function for `text-to-image` model. + + Args: + input_data (List[str]): List of text + inference_parameters (Dict[str, Union[str, float, int]]): your inference parameters + + Returns: + list of ImageOutput + + """ + + outputs = [] + num_inference_steps = int(inference_parameters.pop("num_inference_steps", 50)) + for inp in input_data: + out_image = self.pipeline( + inp, num_inference_steps=num_inference_steps, **inference_parameters).images[0] + out_image = np.asarray(out_image) + outputs.append(ImageOutput(image=out_image)) + + return outputs + +if __name__ == "__main__": + + # Dummy test + from PIL import Image + + model = InferenceModel() + input = "A cat" + output = model.predict([input], inference_parameters=dict(num_inference_steps=30)) + + Image.fromarray(output[0].image).save("tmp.jpg") \ No newline at end of file diff --git a/model_upload/text_to_image/sd-v1.5/requirements.txt b/model_upload/text_to_image/sd-v1.5/requirements.txt new file mode 100644 index 0000000..8a2943c --- /dev/null +++ b/model_upload/text_to_image/sd-v1.5/requirements.txt @@ -0,0 +1,6 @@ +clarifai +tritonclient[all] +torch==1.13.1 +transformers==4.38.0 +Pillow==10.3.0 +diffusers==0.19.0 diff --git a/model_upload/text_to_image/sd-v1.5/test.py b/model_upload/text_to_image/sd-v1.5/test.py new file mode 100644 index 0000000..6bfdf25 --- /dev/null +++ b/model_upload/text_to_image/sd-v1.5/test.py @@ -0,0 +1,47 @@ +import unittest + +from clarifai.models.model_serving.repo_build import BaseTest + + +class CustomTest(unittest.TestCase): + """ + BaseTest loads the InferenceModel from the inference.py file in the current working directory. + To execute the predict method of the InferenceModel, use the predict method in BaseTest. + It takes the exact same inputs and inference parameters, returning the same outputs as InferenceModel.predict. + The difference is that BaseTest.predict verifies your_infer_parameters against config.clarifai_models.inference_parameters and checks the output values. + + For example, test input value of visual-classifier + + def test_input(self): + import cv2 + path = "path/to/image" + img = cv2.imread(path) + outputs = self.model.predict([img], infer_param1=..., infer_param2=...) + print(outputs) + assert outputs + + """ + + def setUp(self) -> None: + your_infer_parameter = dict( + num_inference_steps = 30, + guidance_scale = 7.5, + negative_prompt="low quality" + ) + self.model = BaseTest(your_infer_parameter) + + def test_default_cases(self): + """Test your model with dummy inputs. + In general, you only need to run this test to check your InferneceModel implementation. + In case the default inputs makes your model failed for some reason (not because of assert in `test_with_default_inputs`), + you can comment out this test. + """ + self.model.test_with_default_inputs() + + def test_specific_case1(self): + """ Implement your test case""" + from PIL import Image + input = "A cat meme" + output = self.model.predict([input], num_inference_steps=30.0, negative_prompt="low quality, ugly") + + Image.fromarray(output[0].image).save("tmp.jpg") diff --git a/model_upload/text_to_text/README.md b/model_upload/text_to_text/README.md new file mode 100644 index 0000000..81445ca --- /dev/null +++ b/model_upload/text_to_text/README.md @@ -0,0 +1,65 @@ +## Text-to-Text Model Examples + +These can be used on the fly with minimal or no changes to test deploy all models that take a text input and yield a text output prediction e.g. text generation, summarization and translation models to the Clarifai platform. See the required files section for each model below. + +### [hf-model](./hf-model/) + +The `hf-model` folder contains code to deploy any `text-generation` of hf. + +For instance, suppose you want to deploy [meta-llama/Llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) + +Fist of all, download/Clone the checkpoints and store it under the **hf-model/checkpoint** directory. + +```bash +$ pip install huggingface-hub +$ huggingface-cli download meta-llama/Llama-2-7b-chat-hf --local-dir hf-model/checkpoint --local-dir-use-symlinksFalse --exclude {EXCLUDED FILE TYPES} +``` + +Install dependencies to test locally: + +```bash +$ pip install -r hf-model/requirements.txt +``` + +>Note: Package versions may vary for certain models. + +### [vLLM](./vllm-model/) + +The `vllm-model` folder contains code to deploy `text-generation` using high performance framework `vLLM`. + +#### Prerequisites + +For instance, suppose you want to deploy [meta-llama/Llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) using vllm + +Fist of all, download/Clone the checkpoints and store it under the **vllm-model/weights/** directory. + +```bash +$ pip install huggingface-hub +$ huggingface-cli download meta-llama/Llama-2-7b-chat-hf --local-dir vllm-model/weights/ --local-dir-use-symlinks False --exclude {EXCLUDED FILE TYPES} +``` + +Then in `inference.py`, you may want to update some config of vllm parameters in LLM(). It is recommended to use `gpu_memory_utilization=0.7`. + +Install dependencies to test locally: + +```bash +$ pip install -r vllm-model/requirements.txt +``` + +## Finally Deploy the model to Clarifai + +>Note: you can skip testing by setting `--no-test` flag for `build` and `upload` command + +1. Build + +```bash +$ clarifai build model +``` + +upload `*.clarifai` file to storage to obtain direct download url + +2. Upload + +```bash +$ clarifai upload model --url +``` diff --git a/model_upload/text_to_text/hf-model/clarifai_config.yaml b/model_upload/text_to_text/hf-model/clarifai_config.yaml new file mode 100644 index 0000000..6b17faa --- /dev/null +++ b/model_upload/text_to_text/hf-model/clarifai_config.yaml @@ -0,0 +1,30 @@ +clarifai_model: + clarifai_model_id: '' + clarifai_user_app_id: '' + description: '' + inference_parameters: + - default_value: 256 + description: max_new_tokens + field_type: 3 + path: max_new_tokens + - default_value: 0.9 + description: temperature + field_type: 3 + path: temperature + - default_value: 0.9 + description: top_p + field_type: 3 + path: top_p + - default_value: false + description: do_sample + field_type: 1 + path: do_sample + - default_value: 50 + description: top_k + field_type: 3 + path: top_k + labels: [] + type: text-to-text +serving_backend: + triton: + max_batch_size: 4 diff --git a/model_upload/text_to_text/hf-model/inference.py b/model_upload/text_to_text/hf-model/inference.py new file mode 100644 index 0000000..c15d26a --- /dev/null +++ b/model_upload/text_to_text/hf-model/inference.py @@ -0,0 +1,74 @@ +# User model inference script. + +import os +from pathlib import Path +from typing import Dict, Union + +import torch +import transformers +from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig + +from clarifai.models.model_serving.model_config import * # noqa + + +class InferenceModel(TextToText): + """User model inference class.""" + + def __init__(self) -> None: + """ + Load inference time artifacts that are called frequently .e.g. models, tokenizers, etc. + in this method so they are loaded only once for faster inference. + """ + # current directory + self.base_path: Path = os.path.dirname(__file__) + # where you save hf checkpoint in your working dir e.i. `your_model` + model_path = os.path.join(self.base_path, "checkpoint") + self.tokenizer = AutoTokenizer.from_pretrained(model_path) + + nf4_config = BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_quant_type="nf4", + bnb_4bit_use_double_quant=True, + bnb_4bit_compute_dtype=torch.bfloat16 + ) + + model = AutoModelForCausalLM.from_pretrained( + model_path, + # uncomment to use 4bit + #quantization_config =nf4_config, + torch_dtype=torch.bfloat16, + trust_remote_code=True, + device_map="auto" + ) + model.eval() + + self.pipeline = transformers.pipeline( + "text-generation", + model=model, + tokenizer=self.tokenizer, + torch_dtype=torch.float16, + device_map="auto", + ) + + def predict(self, input_data: list, + inference_parameters: Dict[str, Union[str, float, int]]) -> list: + """ Custom prediction function for `text-to-text` (also called as `text generation`) model. + + Args: + input_data (List[str]): List of text + inference_parameters (Dict[str, Union[str, float, int]]): your inference parameters + + Returns: + list of TextOutput + + """ + top_k = int(inference_parameters.pop("top_k", 50)) + + output_sequences = self.pipeline( + input_data, + eos_token_id=self.tokenizer.eos_token_id, + top_k=top_k, + **inference_parameters) + + # wrap outputs in Clarifai defined output + return [TextOutput(each[0]) for each in output_sequences] diff --git a/model_upload/text_to_text/hf-model/requirements.txt b/model_upload/text_to_text/hf-model/requirements.txt new file mode 100644 index 0000000..aa901cf --- /dev/null +++ b/model_upload/text_to_text/hf-model/requirements.txt @@ -0,0 +1,11 @@ +clarifai +tritonclient[all] +torch==2.1.1 +accelerate==0.20.3 +bitsandbytes==0.41.0 +transformers==4.38.0 +scipy==1.10.1 +einops==0.6.1 +xformers==0.0.20 +sentence_transformers==2.2.0 +protobuf==3.20.2 diff --git a/model_upload/text_to_text/hf-model/test.py b/model_upload/text_to_text/hf-model/test.py new file mode 100644 index 0000000..290e798 --- /dev/null +++ b/model_upload/text_to_text/hf-model/test.py @@ -0,0 +1,47 @@ +import unittest + +from clarifai.models.model_serving.repo_build import BaseTest + + +class CustomTest(unittest.TestCase): + """ + BaseTest loads the InferenceModel from the inference.py file in the current working directory. + To execute the predict method of the InferenceModel, use the predict method in BaseTest. + It takes the exact same inputs and inference parameters, returning the same outputs as InferenceModel.predict. + The difference is that BaseTest.predict verifies your_infer_parameters against config.clarifai_models.inference_parameters and checks the output values. + + For example, test input value of visual-classifier + + def test_input(self): + import cv2 + path = "path/to/image" + img = cv2.imread(path) + outputs = self.model.predict([img], infer_param1=..., infer_param2=...) + print(outputs) + assert outputs + + """ + + def setUp(self) -> None: + your_infer_parameter = dict( + max_new_tokens = 256, + temperature = 0.9, + top_p = 0.9, + do_sample = False, + top_k=50, + ) + self.model = BaseTest(your_infer_parameter) + + def test_default_cases(self): + """Test your model with dummy inputs. + In general, you only need to run this test to check your InferneceModel implementation. + In case the default inputs makes your model failed for some reason (not because of assert in `test_with_default_inputs`), + you can comment out this test. + """ + self.model.test_with_default_inputs() + + def test_specific_case1(self): + """ Implement your test case""" + texts = ["who is satoshi nakamoto?", "What is bitcoin?"] + out = self.model.predict(texts, max_new_tokens=200, top_p=50.0) + print(out) diff --git a/model_upload/text_to_text/vllm-model/clarifai_config.yaml b/model_upload/text_to_text/vllm-model/clarifai_config.yaml new file mode 100644 index 0000000..56775a8 --- /dev/null +++ b/model_upload/text_to_text/vllm-model/clarifai_config.yaml @@ -0,0 +1,26 @@ +clarifai_model: + clarifai_model_id: '' + clarifai_user_app_id: '' + description: '' + inference_parameters: + - default_value: 0.95 + description: top_p + field_type: 3 + path: top_p + - default_value: 50 + description: top_k + field_type: 3 + path: top_k + - default_value: 256 + description: max_tokens + field_type: 3 + path: max_tokens + - default_value: 0.8 + description: temperature + field_type: 3 + path: temperature + labels: [] + type: text-to-text +serving_backend: + triton: + max_batch_size: 4 diff --git a/model_upload/text_to_text/vllm-model/inference.py b/model_upload/text_to_text/vllm-model/inference.py new file mode 100644 index 0000000..3aaaa5f --- /dev/null +++ b/model_upload/text_to_text/vllm-model/inference.py @@ -0,0 +1,55 @@ +# User model inference script. + +import os +from pathlib import Path +from typing import Dict, Union + +from vllm import LLM, SamplingParams + +from clarifai.models.model_serving.model_config import * # noqa + + +class InferenceModel(TextToText): + """User model inference class.""" + + def __init__(self) -> None: + """ + Load inference time artifacts that are called frequently .e.g. models, tokenizers, etc. + in this method so they are loaded only once for faster inference. + """ + # current directory + self.base_path: Path = os.path.dirname(__file__) + path = os.path.join(self.base_path, "weights") + self.model = LLM( + model=path, + dtype="float16", + gpu_memory_utilization=0.9, + #quantization="awq" + ) + + + def predict(self, input_data: list, + inference_parameters: Dict[str, Union[str, float, int, bool]] = {}) -> list: + """ Custom prediction function for `text-to-text` (also called as `text generation`) model. + + Args: + input_data (List[str]): List of text + inference_parameters (Dict[str, Union[str, float, int, bool]]): your inference parameters + + Returns: + list of TextOutput + + """ + + sampling_params = SamplingParams(**inference_parameters) + preds = self.model.generate(input_data, sampling_params) + outputs = [TextOutput(each.outputs[0].text) for each in preds] + + return outputs + + +if __name__ == "__main__": + # dummy test + model = InferenceModel() + output = model.predict(["Test"]) + print(output) diff --git a/model_upload/text_to_text/vllm-model/requirements.txt b/model_upload/text_to_text/vllm-model/requirements.txt new file mode 100644 index 0000000..2e8f97d --- /dev/null +++ b/model_upload/text_to_text/vllm-model/requirements.txt @@ -0,0 +1,5 @@ +clarifai +tritonclient[all] +transformers==4.38.0 +torch==2.0.1 +vllm==0.2.1.post1 diff --git a/model_upload/text_to_text/vllm-model/test.py b/model_upload/text_to_text/vllm-model/test.py new file mode 100644 index 0000000..0615c8a --- /dev/null +++ b/model_upload/text_to_text/vllm-model/test.py @@ -0,0 +1,41 @@ +import unittest + +from clarifai.models.model_serving.repo_build import BaseTest + + +class CustomTest(unittest.TestCase): + """ + BaseTest loads the InferenceModel from the inference.py file in the current working directory. + To execute the predict method of the InferenceModel, use the predict method in BaseTest. + It takes the exact same inputs and inference parameters, returning the same outputs as InferenceModel.predict. + The difference is that BaseTest.predict verifies your_infer_parameters against config.clarifai_models.inference_parameters and checks the output values. + + For example, test input value of visual-classifier + + def test_input(self): + import cv2 + path = "path/to/image" + img = cv2.imread(path) + outputs = self.model.predict([img], infer_param1=..., infer_param2=...) + print(outputs) + assert outputs + + """ + + def setUp(self) -> None: + your_infer_parameter = dict( + top_p=0.95, + top_k=50, + max_tokens=256, + temperature=0.8 + ) # for example dict(float_var=0.12, string_var="test", _secret_string_var="secret") + self.model = BaseTest(your_infer_parameter) + + def test_default_cases(self): + """Test your model with dummy inputs. + In general, you only need to run this test to check your InferneceModel implementation. + In case the default inputs makes your model failed for some reason (not because of assert in `test_with_default_inputs`), + you can comment out this test. + """ + self.model.test_with_default_inputs() + diff --git a/model_upload/visual_classsifier/README.md b/model_upload/visual_classsifier/README.md new file mode 100644 index 0000000..b8b583d --- /dev/null +++ b/model_upload/visual_classsifier/README.md @@ -0,0 +1,35 @@ +## Image Classification Model Examples + +These can be used on the fly with minimal or no changes to test deploy image classification models to the Clarifai platform. See the required files section for each model below. + +* ### [VIT Age Classifier](./age_vit/) + + Required files to run tests locally: + + Download the [model checkpoint from huggingface](https://huggingface.co/nateraw/vit-age-classifier/tree/main) and store it under `age_vit/checkpoint/` + ``` + huggingface-cli download nateraw/vit-age-classifier --local-dir age_vit/checkpoint/ --local-dir-use-symlinks False + ``` + + Install dependencies to test locally: + + ```bash + $ pip install -r age_vit/requirements.txt + ``` + + Deploy the model to Clarifai: + + >Note: you can skip testing by setting `--no-test` flag for `build` and `upload` command + + 1. Build + + ```bash + $ clarifai build model ./age_vit + ``` + upload `*.clarifai` file to storage to obtain direct download url + + 2. Upload + + ```bash + $ clarifai upload model ./age_vit --url + ``` diff --git a/model_upload/visual_classsifier/age_vit/clarifai_config.yaml b/model_upload/visual_classsifier/age_vit/clarifai_config.yaml new file mode 100644 index 0000000..0bf02e6 --- /dev/null +++ b/model_upload/visual_classsifier/age_vit/clarifai_config.yaml @@ -0,0 +1,19 @@ +clarifai_model: + clarifai_model_id: '' + clarifai_user_app_id: '' + description: '' + inference_parameters: [] + labels: + - 0-2 + - 3-9 + - 10-19 + - 20-29 + - 30-39 + - 40-49 + - 50-59 + - 60-69 + - more than 70 + type: visual-classifier +serving_backend: + triton: + max_batch_size: 4 diff --git a/model_upload/visual_classsifier/age_vit/inference.py b/model_upload/visual_classsifier/age_vit/inference.py new file mode 100644 index 0000000..20ce422 --- /dev/null +++ b/model_upload/visual_classsifier/age_vit/inference.py @@ -0,0 +1,52 @@ +# User model inference script. + +import os +from pathlib import Path +from typing import Dict, Union + +import torch +from scipy.special import softmax +from transformers import AutoImageProcessor, ViTForImageClassification + +from clarifai.models.model_serving.model_config import * + + +class InferenceModel(VisualClassifier): + """User model inference class.""" + + def __init__(self) -> None: + """ + Load inference time artifacts that are called frequently .e.g. models, tokenizers, etc. + in this method so they are loaded only once for faster inference. + """ + # current directory + self.base_path: Path = os.path.dirname(__file__) + model_path = os.path.join(self.base_path, "checkpoint") + self.transforms = AutoImageProcessor.from_pretrained(model_path) + self.model = ViTForImageClassification.from_pretrained(model_path) + self.device = "cuda:0" if torch.cuda.is_available() else "cpu" + + def predict(self, input_data: list, + inference_parameters: Dict[str, Union[str, float, int]]) -> list: + """ Custom prediction function for `visual-classifier` model. + + Args: + input_data (List[np.ndarray]): List of image + inference_parameters (Dict[str, Union[str, float, int]]): your inference parameters + + Returns: + list of ClassifierOutput + + """ + + # Transform image and pass it to the model + inputs = self.transforms(input_data, return_tensors='pt') + with torch.no_grad(): + preds = self.model(**inputs).logits + outputs = [] + for pred in preds: + pred_scores = softmax( + pred.detach().numpy()) # alt: softmax(output.logits[0].detach().numpy()) + outputs.append(ClassifierOutput(predicted_scores=pred_scores)) + + return outputs diff --git a/model_upload/visual_classsifier/age_vit/requirements.txt b/model_upload/visual_classsifier/age_vit/requirements.txt new file mode 100644 index 0000000..00fb098 --- /dev/null +++ b/model_upload/visual_classsifier/age_vit/requirements.txt @@ -0,0 +1,7 @@ +clarifai +tritonclient[all] +torch==1.13.1 +transformers==4.38.0 +scipy==1.10.1 +sentencepiece==0.1.99 +protobuf<4.21.3 \ No newline at end of file diff --git a/model_upload/visual_classsifier/age_vit/test.py b/model_upload/visual_classsifier/age_vit/test.py new file mode 100644 index 0000000..3e90821 --- /dev/null +++ b/model_upload/visual_classsifier/age_vit/test.py @@ -0,0 +1,40 @@ +import unittest + +from clarifai.models.model_serving.repo_build import BaseTest + + +class CustomTest(unittest.TestCase): + """ + BaseTest loads the InferenceModel from the inference.py file in the current working directory. + To execute the predict method of the InferenceModel, use the predict method in BaseTest. + It takes the exact same inputs and inference parameters, returning the same outputs as InferenceModel.predict. + The difference is that BaseTest.predict verifies your_infer_parameters against config.clarifai_models.inference_parameters and checks the output values. + + For example, test input value of visual-classifier + + def test_input(self): + import cv2 + path = "path/to/image" + img = cv2.imread(path) + outputs = self.model.predict([img], infer_param1=..., infer_param2=...) + print(outputs) + assert outputs + + """ + + def setUp(self) -> None: + your_infer_parameter = dict( + ) # for example dict(float_var=0.12, string_var="test", _secret_string_var="secret") + self.model = BaseTest(your_infer_parameter) + + def test_default_cases(self): + """Test your model with dummy inputs. + In general, you only need to run this test to check your InferneceModel implementation. + In case the default inputs makes your model failed for some reason (not because of assert in `test_with_default_inputs`), + you can comment out this test. + """ + self.model.test_with_default_inputs() + + def test_specific_case1(self): + """ Implement your test case""" + pass diff --git a/model_upload/visual_detector/Readme.md b/model_upload/visual_detector/Readme.md new file mode 100644 index 0000000..153daea --- /dev/null +++ b/model_upload/visual_detector/Readme.md @@ -0,0 +1,87 @@ +# Visual Detection Model Examples + +These can be used on the fly with minimal or no changes to test deploy visual detection models to the Clarifai platform. See the required files section for each model below and deployment instruction. + +## [yolof](./yolof/) + +[YOLOF](https://github.com/open-mmlab/mmdetection/tree/v3.0.0rc3/configs/yolof) Requirements to run tests locally: + +Download checkpoint and save it in `yolof/config/`: + +```bash +$ wget -P yolof/config https://download.openmmlab.com/mmdetection/v2.0/yolof/yolof_r50_c5_8x8_1x_coco/yolof_r50_c5_8x8_1x_coco_20210425_024427-8e864411.pth +``` + +Install dependecies to test locally + +```bash +$ pip install -r yolof/requirements.txt +``` + +## Torch serve model format [faster-rcnn_torchserve](./faster-rcnn_torchserve/) + +To utilize a Torch serve model (.mar file) created by running torch-model-archiver – essentially a zip file containing the model checkpoint, Python code, and other components – within this module, follow these steps: + +1. Unzip the .mar file to obtain your checkpoint. +2. Implement your postprocess method in inference.py. + +For example: [Faster-RCNN example](https://github.com/pytorch/serve/tree/master/examples/object_detector/fast-rcnn), suppose you already have .mar file following the torch serve example + +unzip it to `./faster-rcnn_torchserve/model_store/hub/checkpoints` as the Torch cache is configured to use this folder in torch serve inference.py. + +```bash +$ unzip faster_rcnn.mar -d ./faster-rcnn_torchserve/model_store/hub/checkpoints/ +``` + +```bash +# in model_store/hub/checkpoints you will have +model_store/hub/checkpoints/ +├── MAR-INF +│ └── MANIFEST.json +├── model.py +└── fasterrcnn_resnet50_fpn_coco-258fb6c6.pth +``` + +Install dependecies to test locally + +```bash +$ pip install -r faster-rcnn_torchserve/requirements.txt +``` + +## [YOLOX](./yolox/) + +[YOLOX](https://github.com/open-mmlab/mmdetection/tree/v3.0.0rc3/configs/yolox) Requirements to run tests locally: + +Download checkpoint and save it in `yolox/configs/yolox/`, e.g download `x` type of model: + +```bash +$ wget -P yolox/configs/yolox/ https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_x_8x8_300e_coco/yolox_x_8x8_300e_coco_20211126_140254-1ef88d67.pth +``` +>Note: If you want to use a different model type or checkpoint, remember to update the `checkpoint` and `config_path` in the `inference.py` file accordingly. + +Install dependecies to test locally + +```bash +$ pip install -r yolox/requirements.txt +``` + + +## Deploy the model to Clarifai + +Steps to deploy one of above examples after downloading weights and testing to the Clarifai platform. + +>Note: set `--no-test` flag for `build` and `upload` command to disable testing + +1. Build + +```bash +$ clarifai build model # either `faster-rcnn_torchserve` or `yolof` or `yolox` +``` + +upload `*.clarifai` file to storage to obtain direct download url + +2. Upload + +```bash +$ clarifai upload model --url +``` \ No newline at end of file diff --git a/model_upload/visual_detector/faster-rcnn_torchserve/clarifai_config.yaml b/model_upload/visual_detector/faster-rcnn_torchserve/clarifai_config.yaml new file mode 100644 index 0000000..b41913a --- /dev/null +++ b/model_upload/visual_detector/faster-rcnn_torchserve/clarifai_config.yaml @@ -0,0 +1,101 @@ +clarifai_model: + clarifai_model_id: '' + clarifai_user_app_id: '' + description: '' + inference_parameters: [] + labels: + - __background__ + - person + - bicycle + - car + - motorcycle + - airplane + - bus + - train + - truck + - boat + - traffic light + - fire hydrant + - N/A + - stop sign + - parking meter + - bench + - bird + - cat + - dog + - horse + - sheep + - cow + - elephant + - bear + - zebra + - giraffe + - N/A + - backpack + - umbrella + - N/A + - N/A + - handbag + - tie + - suitcase + - frisbee + - skis + - snowboard + - sports ball + - kite + - baseball bat + - baseball glove + - skateboard + - surfboard + - tennis racket + - bottle + - N/A + - wine glass + - cup + - fork + - knife + - spoon + - bowl + - banana + - apple + - sandwich + - orange + - broccoli + - carrot + - hot dog + - pizza + - donut + - cake + - chair + - couch + - potted plant + - bed + - N/A + - dining table + - N/A + - N/A + - toilet + - N/A + - tv + - laptop + - mouse + - remote + - keyboard + - cell phone + - microwave + - oven + - toaster + - sink + - refrigerator + - N/A + - book + - clock + - vase + - scissors + - teddy bear + - hair drier + - toothbrush + type: visual-detector +serving_backend: + triton: + max_batch_size: 4 diff --git a/model_upload/visual_detector/faster-rcnn_torchserve/inference.py b/model_upload/visual_detector/faster-rcnn_torchserve/inference.py new file mode 100644 index 0000000..24611bd --- /dev/null +++ b/model_upload/visual_detector/faster-rcnn_torchserve/inference.py @@ -0,0 +1,68 @@ +import os + +ROOT = os.path.dirname(__file__) +os.environ['TORCH_HOME'] = os.path.join(ROOT, "model_store") + +from pathlib import Path # noqa: E402 +import numpy as np # noqa: E402 +import torch # noqa: E402 +from PIL import Image # noqa: E402 +from torchvision import models, transforms # noqa: E402 + +from pathlib import Path +from typing import Dict, Union +from clarifai.models.model_serving.model_config import * # noqa + + +class InferenceModel(VisualDetector): + """User model inference class.""" + + def __init__(self) -> None: + """ + Load inference time artifacts that are called frequently .e.g. models, tokenizers, etc. + in this method so they are loaded only once for faster inference. + """ + # current directory + self.base_path: Path = os.path.dirname(__file__) + #self.checkpoint = os.path.join(ROOT, "model_store/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth") + self.device = "cuda:0" if torch.cuda.is_available() else "cpu" + + self.transform = transforms.Compose([ + transforms.ToTensor(), + ]) + self.model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True) + self.model = self.model.to(self.device) + self.model.eval() + + def predict(self, input_data: list, + inference_parameters: Dict[str, Union[str, float, int, bool]] = {}) -> list: + """ Custom prediction function for `visual-detector` model. + + Args: + input_data (List[np.ndarray]): List of image + inference_parameters (Dict[str, Union[str, float, int, bool]]): your inference parameters + + Returns: + list of VisualDetectorOutput + + """ + outputs = [] + + input_tensor = [self.transform(Image.fromarray(each)) for each in input_data] + input_tensor = torch.stack(input_tensor).to(self.device) + + with torch.no_grad(): + predictions = self.model(input_tensor) + + for inp_data, preds in zip(input_data, predictions): + boxes = preds["boxes"].cpu().numpy() + labels = preds["labels"].detach().cpu().numpy() + scores = preds["scores"].detach().cpu().numpy() + h, w = inp_data.shape[:2] + # convert model output to clarifai detection output format + output = VisualDetector.postprocess(width=w, height=h, labels=labels, scores=scores, xyxy_boxes=boxes) + outputs.append(output) + + # return list of VisualDetectorOutput + return outputs + \ No newline at end of file diff --git a/model_upload/visual_detector/faster-rcnn_torchserve/model_store/hub/checkpoints/keep b/model_upload/visual_detector/faster-rcnn_torchserve/model_store/hub/checkpoints/keep new file mode 100644 index 0000000..e69de29 diff --git a/model_upload/visual_detector/faster-rcnn_torchserve/requirements.txt b/model_upload/visual_detector/faster-rcnn_torchserve/requirements.txt new file mode 100644 index 0000000..b7a40ae --- /dev/null +++ b/model_upload/visual_detector/faster-rcnn_torchserve/requirements.txt @@ -0,0 +1,3 @@ +tritonclient[all] +clarifai>9.10.4 +torch==2.0.1 diff --git a/model_upload/visual_detector/faster-rcnn_torchserve/test.py b/model_upload/visual_detector/faster-rcnn_torchserve/test.py new file mode 100644 index 0000000..a862983 --- /dev/null +++ b/model_upload/visual_detector/faster-rcnn_torchserve/test.py @@ -0,0 +1,36 @@ +import unittest + +from clarifai.models.model_serving.repo_build import BaseTest + + +class CustomTest(unittest.TestCase): + """ + BaseTest loads the InferenceModel from the inference.py file in the current working directory. + To execute the predict method of the InferenceModel, use the predict method in BaseTest. + It takes the exact same inputs and inference parameters, returning the same outputs as InferenceModel.predict. + The difference is that BaseTest.predict verifies your_infer_parameters against config.clarifai_models.inference_parameters and checks the output values. + + For example, test input value of visual-classifier + + def test_input(self): + import cv2 + path = "path/to/image" + img = cv2.imread(path) + outputs = self.model.predict([img], infer_param1=..., infer_param2=...) + print(outputs) + assert outputs + + """ + + def setUp(self) -> None: + your_infer_parameter = dict( + ) # for example dict(float_var=0.12, string_var="test", _secret_string_var="secret") + self.model = BaseTest(your_infer_parameter) + + def test_default_cases(self): + """Test your model with dummy inputs. + In general, you only need to run this test to check your InferneceModel implementation. + In case the default inputs makes your model failed for some reason (not because of assert in `test_with_default_inputs`), + you can comment out this test. + """ + self.model.test_with_default_inputs() diff --git a/model_upload/visual_detector/yolof/clarifai_config.yaml b/model_upload/visual_detector/yolof/clarifai_config.yaml new file mode 100644 index 0000000..10102b7 --- /dev/null +++ b/model_upload/visual_detector/yolof/clarifai_config.yaml @@ -0,0 +1,90 @@ +clarifai_model: + clarifai_model_id: '' + clarifai_user_app_id: '' + description: '' + inference_parameters: [] + labels: + - person + - bicycle + - car + - motorcycle + - airplane + - bus + - train + - truck + - boat + - traffic-light + - fire-hydrant + - stop-sign + - parking-meter + - bench + - bird + - cat + - dog + - horse + - sheep + - cow + - elephant + - bear + - zebra + - giraffe + - backpack + - umbrella + - handbag + - tie + - suitcase + - frisbee + - skis + - snowboard + - sports-ball + - kite + - baseball-bat + - baseball-glove + - skateboard + - surfboard + - tennis-racket + - bottle + - wine-glass + - cup + - fork + - knife + - spoon + - bowl + - banana + - apple + - sandwich + - orange + - broccoli + - carrot + - hot-dog + - pizza + - donut + - cake + - chair + - couch + - potted-plant + - bed + - dining-table + - toilet + - tv + - laptop + - mouse + - remote + - keyboard + - cell-phone + - microwave + - oven + - toaster + - sink + - refrigerator + - book + - clock + - vase + - scissors + - teddy-bear + - hair-drier + - toothbrush + type: visual-detector +serving_backend: + triton: + max_batch_size: 4 diff --git a/model_upload/visual_detector/yolof/config/yolof_r50_c5_8x8_1x_coco.py b/model_upload/visual_detector/yolof/config/yolof_r50_c5_8x8_1x_coco.py new file mode 100644 index 0000000..0536afb --- /dev/null +++ b/model_upload/visual_detector/yolof/config/yolof_r50_c5_8x8_1x_coco.py @@ -0,0 +1,245 @@ +auto_scale_lr = dict(base_batch_size=64, enable=False) +data_root = 'data/coco/' +dataset_type = 'CocoDataset' +default_hooks = dict( + checkpoint=dict(interval=1, type='CheckpointHook'), + logger=dict(interval=50, type='LoggerHook'), + param_scheduler=dict(type='ParamSchedulerHook'), + sampler_seed=dict(type='DistSamplerSeedHook'), + timer=dict(type='IterTimerHook'), + visualization=dict(type='DetVisualizationHook')) +default_scope = 'mmdet' +env_cfg = dict( + cudnn_benchmark=False, + dist_cfg=dict(backend='nccl'), + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) +file_client_args = dict(backend='disk') +load_from = None +log_level = 'INFO' +log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50) +model = dict( + backbone=dict( + depth=50, + frozen_stages=1, + init_cfg=dict(checkpoint='open-mmlab://detectron/resnet50_caffe', type='Pretrained'), + norm_cfg=dict(requires_grad=False, type='BN'), + norm_eval=True, + num_stages=4, + out_indices=(3,), + style='caffe', + type='ResNet'), + bbox_head=dict( + anchor_generator=dict( + ratios=[ + 1.0, + ], scales=[ + 1, + 2, + 4, + 8, + 16, + ], strides=[ + 32, + ], type='AnchorGenerator'), + bbox_coder=dict( + add_ctr_clamp=True, + ctr_clamp=32, + target_means=[ + 0.0, + 0.0, + 0.0, + 0.0, + ], + target_stds=[ + 1.0, + 1.0, + 1.0, + 1.0, + ], + type='DeltaXYWHBBoxCoder'), + in_channels=512, + loss_bbox=dict(loss_weight=1.0, type='GIoULoss'), + loss_cls=dict(alpha=0.25, gamma=2.0, loss_weight=1.0, type='FocalLoss', use_sigmoid=True), + num_classes=80, + reg_decoded_bbox=True, + type='YOLOFHead'), + data_preprocessor=dict( + bgr_to_rgb=False, + mean=[ + 103.53, + 116.28, + 123.675, + ], + pad_size_divisor=32, + std=[ + 1.0, + 1.0, + 1.0, + ], + type='DetDataPreprocessor'), + neck=dict( + block_dilations=[ + 2, + 4, + 6, + 8, + ], + block_mid_channels=128, + in_channels=2048, + num_residual_blocks=4, + out_channels=512, + type='DilatedEncoder'), + test_cfg=dict( + max_per_img=100, + min_bbox_size=0, + nms=dict(iou_threshold=0.6, type='nms'), + nms_pre=1000, + score_thr=0.05), + train_cfg=dict( + allowed_border=-1, + assigner=dict(neg_ignore_thr=0.7, pos_ignore_thr=0.15, type='UniformAssigner'), + debug=False, + pos_weight=-1), + type='YOLOF') +optim_wrapper = dict( + optimizer=dict(lr=0.12, momentum=0.9, type='SGD', weight_decay=0.0001), + paramwise_cfg=dict( + custom_keys=dict(backbone=dict(lr_mult=0.3333333333333333)), norm_decay_mult=0.0), + type='OptimWrapper') +param_scheduler = [ + dict(begin=0, by_epoch=False, end=1500, start_factor=0.00066667, type='LinearLR'), + dict(begin=0, by_epoch=True, end=12, gamma=0.1, milestones=[ + 8, + 11, + ], type='MultiStepLR'), +] +resume = False +test_cfg = dict(type='TestLoop') +test_dataloader = dict( + batch_size=1, + dataset=dict( + ann_file='annotations/instances_val2017.json', + data_prefix=dict(img='val2017/'), + data_root='data/coco/', + pipeline=[ + dict(file_client_args=dict(backend='disk'), type='LoadImageFromFile'), + dict(keep_ratio=True, scale=( + 1333, + 800, + ), type='Resize'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + meta_keys=( + 'img_id', + 'img_path', + 'ori_shape', + 'img_shape', + 'scale_factor', + ), + type='PackDetInputs'), + ], + test_mode=True, + type='CocoDataset'), + drop_last=False, + num_workers=2, + persistent_workers=True, + sampler=dict(shuffle=False, type='DefaultSampler')) +test_evaluator = dict( + ann_file='data/coco/annotations/instances_val2017.json', + format_only=False, + metric='bbox', + type='CocoMetric') +test_pipeline = [ + dict(file_client_args=dict(backend='disk'), type='LoadImageFromFile'), + dict(keep_ratio=True, scale=( + 1333, + 800, + ), type='Resize'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + meta_keys=( + 'img_id', + 'img_path', + 'ori_shape', + 'img_shape', + 'scale_factor', + ), + type='PackDetInputs'), +] +train_cfg = dict(max_epochs=12, type='EpochBasedTrainLoop', val_interval=1) +train_dataloader = dict( + batch_sampler=dict(type='AspectRatioBatchSampler'), + batch_size=8, + dataset=dict( + ann_file='annotations/instances_train2017.json', + data_prefix=dict(img='train2017/'), + data_root='data/coco/', + filter_cfg=dict(filter_empty_gt=True, min_size=32), + pipeline=[ + dict(file_client_args=dict(backend='disk'), type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(keep_ratio=True, scale=( + 1333, + 800, + ), type='Resize'), + dict(prob=0.5, type='RandomFlip'), + dict(max_shift_px=32, prob=0.5, type='RandomShift'), + dict(type='PackDetInputs'), + ], + type='CocoDataset'), + num_workers=8, + persistent_workers=True, + sampler=dict(shuffle=True, type='DefaultSampler')) +train_pipeline = [ + dict(file_client_args=dict(backend='disk'), type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(keep_ratio=True, scale=( + 1333, + 800, + ), type='Resize'), + dict(prob=0.5, type='RandomFlip'), + dict(max_shift_px=32, prob=0.5, type='RandomShift'), + dict(type='PackDetInputs'), +] +val_cfg = dict(type='ValLoop') +val_dataloader = dict( + batch_size=1, + dataset=dict( + ann_file='annotations/instances_val2017.json', + data_prefix=dict(img='val2017/'), + data_root='data/coco/', + pipeline=[ + dict(file_client_args=dict(backend='disk'), type='LoadImageFromFile'), + dict(keep_ratio=True, scale=( + 1333, + 800, + ), type='Resize'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + meta_keys=( + 'img_id', + 'img_path', + 'ori_shape', + 'img_shape', + 'scale_factor', + ), + type='PackDetInputs'), + ], + test_mode=True, + type='CocoDataset'), + drop_last=False, + num_workers=2, + persistent_workers=True, + sampler=dict(shuffle=False, type='DefaultSampler')) +val_evaluator = dict( + ann_file='data/coco/annotations/instances_val2017.json', + format_only=False, + metric='bbox', + type='CocoMetric') +vis_backends = [ + dict(type='LocalVisBackend'), +] +visualizer = dict( + name='visualizer', type='DetLocalVisualizer', vis_backends=[ + dict(type='LocalVisBackend'), + ]) diff --git a/model_upload/visual_detector/yolof/inference.py b/model_upload/visual_detector/yolof/inference.py new file mode 100644 index 0000000..8a61827 --- /dev/null +++ b/model_upload/visual_detector/yolof/inference.py @@ -0,0 +1,62 @@ +# User model inference script. + +import os +from pathlib import Path +from typing import Dict, Union + +import numpy as np +import torch +from mmdet.apis import inference_detector, init_detector +from mmdet.utils import register_all_modules + +# Initialize the DetInferencer +register_all_modules() + +from clarifai.models.model_serving.model_config import * # noqa + + +class InferenceModel(VisualDetector): + """User model inference class.""" + + def __init__(self) -> None: + """ + Load inference time artifacts that are called frequently .e.g. models, tokenizers, etc. + in this method so they are loaded only once for faster inference. + """ + # current directory + self.base_path: Path = os.path.dirname(__file__) + self.checkpoint = os.path.join(self.base_path, + "config/yolof_r50_c5_8x8_1x_coco_20210425_024427-8e864411.pth") + self.config_path = os.path.join(self.base_path, "config/yolof_r50_c5_8x8_1x_coco.py") + self.device = "cuda:0" if torch.cuda.is_available() else "cpu" + self.model = init_detector(self.config_path, self.checkpoint, device=self.device) + + + def predict(self, input_data: list, + inference_parameters: Dict[str, Union[str, float, int, bool]] = {}) -> list: + """ Custom prediction function for `visual-detector` model. + + Args: + input_data (List[np.ndarray]): List of image + inference_parameters (Dict[str, Union[str, float, int, bool]]): your inference parameters + + Returns: + list of VisualDetectorOutput + + """ + outputs = [] + + predictions = inference_detector(self.model, input_data) + for inp_data, preds in zip(input_data, predictions): + + labels = preds.pred_instances.labels.cpu().numpy() + bboxes = preds.pred_instances.bboxes.cpu().numpy() + scores = preds.pred_instances.scores.cpu().numpy() + h, w, _ = inp_data.shape # input image shape + # convert model output to clarifai detection output format + output = VisualDetector.postprocess( + width=w, height=h, labels=labels, xyxy_boxes=bboxes, scores=scores, max_bbox_count=300) + outputs.append(output) + + # return list of VisualDetectorOutput + return outputs diff --git a/model_upload/visual_detector/yolof/requirements.txt b/model_upload/visual_detector/yolof/requirements.txt new file mode 100644 index 0000000..4e4c647 --- /dev/null +++ b/model_upload/visual_detector/yolof/requirements.txt @@ -0,0 +1,9 @@ +mmdet==3.0.0rc3 +mmcv==2.0.0rc3 +-f https://download.openmmlab.com/mmcv/dist/cu117/torch1.13/index.html + +clarifai +tritonclient[all] +torch==1.13.1 +numpy==1.23.1 +opencv-python-headless diff --git a/model_upload/visual_detector/yolof/test.py b/model_upload/visual_detector/yolof/test.py new file mode 100644 index 0000000..a862983 --- /dev/null +++ b/model_upload/visual_detector/yolof/test.py @@ -0,0 +1,36 @@ +import unittest + +from clarifai.models.model_serving.repo_build import BaseTest + + +class CustomTest(unittest.TestCase): + """ + BaseTest loads the InferenceModel from the inference.py file in the current working directory. + To execute the predict method of the InferenceModel, use the predict method in BaseTest. + It takes the exact same inputs and inference parameters, returning the same outputs as InferenceModel.predict. + The difference is that BaseTest.predict verifies your_infer_parameters against config.clarifai_models.inference_parameters and checks the output values. + + For example, test input value of visual-classifier + + def test_input(self): + import cv2 + path = "path/to/image" + img = cv2.imread(path) + outputs = self.model.predict([img], infer_param1=..., infer_param2=...) + print(outputs) + assert outputs + + """ + + def setUp(self) -> None: + your_infer_parameter = dict( + ) # for example dict(float_var=0.12, string_var="test", _secret_string_var="secret") + self.model = BaseTest(your_infer_parameter) + + def test_default_cases(self): + """Test your model with dummy inputs. + In general, you only need to run this test to check your InferneceModel implementation. + In case the default inputs makes your model failed for some reason (not because of assert in `test_with_default_inputs`), + you can comment out this test. + """ + self.model.test_with_default_inputs() diff --git a/model_upload/visual_detector/yolox/clarifai_config.yaml b/model_upload/visual_detector/yolox/clarifai_config.yaml new file mode 100644 index 0000000..10102b7 --- /dev/null +++ b/model_upload/visual_detector/yolox/clarifai_config.yaml @@ -0,0 +1,90 @@ +clarifai_model: + clarifai_model_id: '' + clarifai_user_app_id: '' + description: '' + inference_parameters: [] + labels: + - person + - bicycle + - car + - motorcycle + - airplane + - bus + - train + - truck + - boat + - traffic-light + - fire-hydrant + - stop-sign + - parking-meter + - bench + - bird + - cat + - dog + - horse + - sheep + - cow + - elephant + - bear + - zebra + - giraffe + - backpack + - umbrella + - handbag + - tie + - suitcase + - frisbee + - skis + - snowboard + - sports-ball + - kite + - baseball-bat + - baseball-glove + - skateboard + - surfboard + - tennis-racket + - bottle + - wine-glass + - cup + - fork + - knife + - spoon + - bowl + - banana + - apple + - sandwich + - orange + - broccoli + - carrot + - hot-dog + - pizza + - donut + - cake + - chair + - couch + - potted-plant + - bed + - dining-table + - toilet + - tv + - laptop + - mouse + - remote + - keyboard + - cell-phone + - microwave + - oven + - toaster + - sink + - refrigerator + - book + - clock + - vase + - scissors + - teddy-bear + - hair-drier + - toothbrush + type: visual-detector +serving_backend: + triton: + max_batch_size: 4 diff --git a/model_upload/visual_detector/yolox/configs/_base_/default_runtime.py b/model_upload/visual_detector/yolox/configs/_base_/default_runtime.py new file mode 100644 index 0000000..870e561 --- /dev/null +++ b/model_upload/visual_detector/yolox/configs/_base_/default_runtime.py @@ -0,0 +1,24 @@ +default_scope = 'mmdet' + +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', interval=1), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='DetVisualizationHook')) + +env_cfg = dict( + cudnn_benchmark=False, + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + dist_cfg=dict(backend='nccl'), +) + +vis_backends = [dict(type='LocalVisBackend')] +visualizer = dict( + type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') +log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True) + +log_level = 'INFO' +load_from = None +resume = False diff --git a/model_upload/visual_detector/yolox/configs/_base_/schedules/schedule_1x.py b/model_upload/visual_detector/yolox/configs/_base_/schedules/schedule_1x.py new file mode 100644 index 0000000..95f30be --- /dev/null +++ b/model_upload/visual_detector/yolox/configs/_base_/schedules/schedule_1x.py @@ -0,0 +1,28 @@ +# training schedule for 1x +train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=1) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), + dict( + type='MultiStepLR', + begin=0, + end=12, + by_epoch=True, + milestones=[8, 11], + gamma=0.1) +] + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)) + +# Default setting for scaling LR automatically +# - `enable` means enable scaling LR automatically +# or not by default. +# - `base_batch_size` = (8 GPUs) x (2 samples per GPU). +auto_scale_lr = dict(enable=False, base_batch_size=16) diff --git a/model_upload/visual_detector/yolox/configs/yolox/yolox_l_8xb8-300e_coco.py b/model_upload/visual_detector/yolox/configs/yolox/yolox_l_8xb8-300e_coco.py new file mode 100644 index 0000000..2a4b287 --- /dev/null +++ b/model_upload/visual_detector/yolox/configs/yolox/yolox_l_8xb8-300e_coco.py @@ -0,0 +1,8 @@ +_base_ = './yolox_s_8xb8-300e_coco.py' + +# model settings +model = dict( + backbone=dict(deepen_factor=1.0, widen_factor=1.0), + neck=dict( + in_channels=[256, 512, 1024], out_channels=256, num_csp_blocks=3), + bbox_head=dict(in_channels=256, feat_channels=256)) diff --git a/model_upload/visual_detector/yolox/configs/yolox/yolox_m_8xb8-300e_coco.py b/model_upload/visual_detector/yolox/configs/yolox/yolox_m_8xb8-300e_coco.py new file mode 100644 index 0000000..d82f9e9 --- /dev/null +++ b/model_upload/visual_detector/yolox/configs/yolox/yolox_m_8xb8-300e_coco.py @@ -0,0 +1,8 @@ +_base_ = './yolox_s_8xb8-300e_coco.py' + +# model settings +model = dict( + backbone=dict(deepen_factor=0.67, widen_factor=0.75), + neck=dict(in_channels=[192, 384, 768], out_channels=192, num_csp_blocks=2), + bbox_head=dict(in_channels=192, feat_channels=192), +) diff --git a/model_upload/visual_detector/yolox/configs/yolox/yolox_nano_8xb8-300e_coco.py b/model_upload/visual_detector/yolox/configs/yolox/yolox_nano_8xb8-300e_coco.py new file mode 100644 index 0000000..3f7a1c5 --- /dev/null +++ b/model_upload/visual_detector/yolox/configs/yolox/yolox_nano_8xb8-300e_coco.py @@ -0,0 +1,11 @@ +_base_ = './yolox_tiny_8xb8-300e_coco.py' + +# model settings +model = dict( + backbone=dict(deepen_factor=0.33, widen_factor=0.25, use_depthwise=True), + neck=dict( + in_channels=[64, 128, 256], + out_channels=64, + num_csp_blocks=1, + use_depthwise=True), + bbox_head=dict(in_channels=64, feat_channels=64, use_depthwise=True)) diff --git a/model_upload/visual_detector/yolox/configs/yolox/yolox_s_8xb8-300e_coco.py b/model_upload/visual_detector/yolox/configs/yolox/yolox_s_8xb8-300e_coco.py new file mode 100644 index 0000000..e78a82f --- /dev/null +++ b/model_upload/visual_detector/yolox/configs/yolox/yolox_s_8xb8-300e_coco.py @@ -0,0 +1,236 @@ +_base_ = ['../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'] + +img_scale = (640, 640) # height, width + +# model settings +model = dict( + type='YOLOX', + data_preprocessor=dict( + type='DetDataPreprocessor', + pad_size_divisor=32, + batch_augments=[ + dict( + type='BatchSyncRandomResize', + random_size_range=(480, 800), + size_divisor=32, + interval=10) + ]), + backbone=dict( + type='CSPDarknet', + deepen_factor=0.33, + widen_factor=0.5, + out_indices=(2, 3, 4), + use_depthwise=False, + spp_kernal_sizes=(5, 9, 13), + norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), + act_cfg=dict(type='Swish'), + ), + neck=dict( + type='YOLOXPAFPN', + in_channels=[128, 256, 512], + out_channels=128, + num_csp_blocks=1, + use_depthwise=False, + upsample_cfg=dict(scale_factor=2, mode='nearest'), + norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), + act_cfg=dict(type='Swish')), + bbox_head=dict( + type='YOLOXHead', + num_classes=80, + in_channels=128, + feat_channels=128, + stacked_convs=2, + strides=(8, 16, 32), + use_depthwise=False, + norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), + act_cfg=dict(type='Swish'), + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=True, + reduction='sum', + loss_weight=1.0), + loss_bbox=dict( + type='IoULoss', + mode='square', + eps=1e-16, + reduction='sum', + loss_weight=5.0), + loss_obj=dict( + type='CrossEntropyLoss', + use_sigmoid=True, + reduction='sum', + loss_weight=1.0), + loss_l1=dict(type='L1Loss', reduction='sum', loss_weight=1.0)), + train_cfg=dict(assigner=dict(type='SimOTAAssigner', center_radius=2.5)), + # In order to align the source code, the threshold of the val phase is + # 0.01, and the threshold of the test phase is 0.001. + test_cfg=dict(score_thr=0.01, nms=dict(type='nms', iou_threshold=0.65))) + +# dataset settings +data_root = 'data/coco/' +dataset_type = 'CocoDataset' + +# file_client_args = dict( +# backend='petrel', +# path_mapping=dict({ +# './data/': 's3://openmmlab/datasets/detection/', +# 'data/': 's3://openmmlab/datasets/detection/' +# })) +file_client_args = dict(backend='disk') + +train_pipeline = [ + dict(type='Mosaic', img_scale=img_scale, pad_val=114.0), + dict( + type='RandomAffine', + scaling_ratio_range=(0.1, 2), + border=(-img_scale[0] // 2, -img_scale[1] // 2)), + dict( + type='MixUp', + img_scale=img_scale, + ratio_range=(0.8, 1.6), + pad_val=114.0), + dict(type='YOLOXHSVRandomAug'), + dict(type='RandomFlip', prob=0.5), + # According to the official implementation, multi-scale + # training is not considered here but in the + # 'mmdet/models/detectors/yolox.py'. + # Resize and Pad are for the last 15 epochs when Mosaic, + # RandomAffine, and MixUp are closed by YOLOXModeSwitchHook. + dict(type='Resize', scale=img_scale, keep_ratio=True), + dict( + type='Pad', + pad_to_square=True, + # If the image is three-channel, the pad value needs + # to be set separately for each channel. + pad_val=dict(img=(114.0, 114.0, 114.0))), + dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1), keep_empty=False), + dict(type='PackDetInputs') +] + +train_dataset = dict( + # use MultiImageMixDataset wrapper to support mosaic and mixup + type='MultiImageMixDataset', + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='annotations/instances_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=[ + dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadAnnotations', with_bbox=True) + ], + filter_cfg=dict(filter_empty_gt=False, min_size=32)), + pipeline=train_pipeline) + +test_pipeline = [ + dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='Resize', scale=img_scale, keep_ratio=True), + dict( + type='Pad', + pad_to_square=True, + pad_val=dict(img=(114.0, 114.0, 114.0))), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] + +train_dataloader = dict( + batch_size=8, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=train_dataset) +val_dataloader = dict( + batch_size=8, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='annotations/instances_val2017.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/instances_val2017.json', + metric='bbox') +test_evaluator = val_evaluator + +# training settings +max_epochs = 300 +num_last_epochs = 15 +interval = 10 + +train_cfg = dict(max_epochs=max_epochs, val_interval=interval) + +# optimizer +# default 8 gpu +base_lr = 0.01 +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict( + type='SGD', lr=base_lr, momentum=0.9, weight_decay=5e-4, + nesterov=True), + paramwise_cfg=dict(norm_decay_mult=0., bias_decay_mult=0.)) + +# learning rate +param_scheduler = [ + dict( + # use quadratic formula to warm up 5 epochs + # and lr is updated by iteration + # TODO: fix default scope in get function + type='mmdet.QuadraticWarmupLR', + by_epoch=True, + begin=0, + end=5, + convert_to_iter_based=True), + dict( + # use cosine lr from 5 to 285 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=5, + T_max=max_epochs - num_last_epochs, + end=max_epochs - num_last_epochs, + by_epoch=True, + convert_to_iter_based=True), + dict( + # use fixed lr during last 15 epochs + type='ConstantLR', + by_epoch=True, + factor=1, + begin=max_epochs - num_last_epochs, + end=max_epochs, + ) +] + +default_hooks = dict( + checkpoint=dict( + interval=interval, + max_keep_ckpts=3 # only keep latest 3 checkpoints + )) + +custom_hooks = [ + dict( + type='YOLOXModeSwitchHook', + num_last_epochs=num_last_epochs, + priority=48), + dict(type='SyncNormHook', priority=48), + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0001, + update_buffers=True, + priority=49) +] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (8 samples per GPU) +auto_scale_lr = dict(base_batch_size=64) diff --git a/model_upload/visual_detector/yolox/configs/yolox/yolox_tiny_8xb8-300e_coco.py b/model_upload/visual_detector/yolox/configs/yolox/yolox_tiny_8xb8-300e_coco.py new file mode 100644 index 0000000..b4f5bde --- /dev/null +++ b/model_upload/visual_detector/yolox/configs/yolox/yolox_tiny_8xb8-300e_coco.py @@ -0,0 +1,61 @@ +_base_ = './yolox_s_8xb8-300e_coco.py' + +# model settings +model = dict( + data_preprocessor=dict(batch_augments=[ + dict( + type='BatchSyncRandomResize', + random_size_range=(320, 640), + size_divisor=32, + interval=10) + ]), + backbone=dict(deepen_factor=0.33, widen_factor=0.375), + neck=dict(in_channels=[96, 192, 384], out_channels=96), + bbox_head=dict(in_channels=96, feat_channels=96)) + +img_scale = (640, 640) # height, width + +# file_client_args = dict( +# backend='petrel', +# path_mapping=dict({ +# './data/': 's3://openmmlab/datasets/detection/', +# 'data/': 's3://openmmlab/datasets/detection/' +# })) +file_client_args = dict(backend='disk') + +train_pipeline = [ + dict(type='Mosaic', img_scale=img_scale, pad_val=114.0), + dict( + type='RandomAffine', + scaling_ratio_range=(0.5, 1.5), + border=(-img_scale[0] // 2, -img_scale[1] // 2)), + dict(type='YOLOXHSVRandomAug'), + dict(type='RandomFlip', prob=0.5), + # Resize and Pad are for the last 15 epochs when Mosaic and + # RandomAffine are closed by YOLOXModeSwitchHook. + dict(type='Resize', scale=img_scale, keep_ratio=True), + dict( + type='Pad', + pad_to_square=True, + pad_val=dict(img=(114.0, 114.0, 114.0))), + dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1), keep_empty=False), + dict(type='PackDetInputs') +] + +test_pipeline = [ + dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='Resize', scale=(416, 416), keep_ratio=True), + dict( + type='Pad', + pad_to_square=True, + pad_val=dict(img=(114.0, 114.0, 114.0))), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] + +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) +val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/model_upload/visual_detector/yolox/configs/yolox/yolox_x_8xb8-300e_coco.py b/model_upload/visual_detector/yolox/configs/yolox/yolox_x_8xb8-300e_coco.py new file mode 100644 index 0000000..34828e0 --- /dev/null +++ b/model_upload/visual_detector/yolox/configs/yolox/yolox_x_8xb8-300e_coco.py @@ -0,0 +1,8 @@ +_base_ = './yolox_s_8xb8-300e_coco.py' + +# model settings +model = dict( + backbone=dict(deepen_factor=1.33, widen_factor=1.25), + neck=dict( + in_channels=[320, 640, 1280], out_channels=320, num_csp_blocks=4), + bbox_head=dict(in_channels=320, feat_channels=320)) diff --git a/model_upload/visual_detector/yolox/inference.py b/model_upload/visual_detector/yolox/inference.py new file mode 100644 index 0000000..a1d4b92 --- /dev/null +++ b/model_upload/visual_detector/yolox/inference.py @@ -0,0 +1,61 @@ +# User model inference script. + +import os +from pathlib import Path +from typing import Dict, Union + +import torch +from mmdet.apis import inference_detector, init_detector +from mmdet.utils import register_all_modules + +# Initialize the DetInferencer +register_all_modules() + +from clarifai.models.model_serving.model_config import * # noqa + + +class InferenceModel(VisualDetector): + """User model inference class.""" + + def __init__(self) -> None: + """ + Load inference time artifacts that are called frequently .e.g. models, tokenizers, etc. + in this method so they are loaded only once for faster inference. + """ + # current directory + self.base_path: Path = os.path.dirname(__file__) + self.checkpoint = os.path.join(self.base_path, + "configs/yolox/yolox_x_8x8_300e_coco_20211126_140254-1ef88d67.pth") + self.config_path = os.path.join(self.base_path, "configs/yolox/yolox_x_8xb8-300e_coco.py") + self.device = "cuda:0" if torch.cuda.is_available() else "cpu" + self.model = init_detector(self.config_path, self.checkpoint, device=self.device) + + + def predict(self, input_data: list, + inference_parameters: Dict[str, Union[str, float, int, bool]] = {}) -> list: + """ Custom prediction function for `visual-detector` model. + + Args: + input_data (List[np.ndarray]): List of image + inference_parameters (Dict[str, Union[str, float, int, bool]]): your inference parameters + + Returns: + list of VisualDetectorOutput + + """ + outputs = [] + + predictions = inference_detector(self.model, input_data) + for inp_data, preds in zip(input_data, predictions): + + labels = preds.pred_instances.labels.cpu().numpy() + bboxes = preds.pred_instances.bboxes.cpu().numpy() + scores = preds.pred_instances.scores.cpu().numpy() + h, w, _ = inp_data.shape # input image shape + # convert model output to clarifai detection output format + output = VisualDetector.postprocess( + width=w, height=h, labels=labels, xyxy_boxes=bboxes, scores=scores, max_bbox_count=300) + outputs.append(output) + + # return list of VisualDetectorOutput + return outputs diff --git a/model_upload/visual_detector/yolox/requirements.txt b/model_upload/visual_detector/yolox/requirements.txt new file mode 100644 index 0000000..4e4c647 --- /dev/null +++ b/model_upload/visual_detector/yolox/requirements.txt @@ -0,0 +1,9 @@ +mmdet==3.0.0rc3 +mmcv==2.0.0rc3 +-f https://download.openmmlab.com/mmcv/dist/cu117/torch1.13/index.html + +clarifai +tritonclient[all] +torch==1.13.1 +numpy==1.23.1 +opencv-python-headless diff --git a/model_upload/visual_detector/yolox/test.py b/model_upload/visual_detector/yolox/test.py new file mode 100644 index 0000000..1d8bbb3 --- /dev/null +++ b/model_upload/visual_detector/yolox/test.py @@ -0,0 +1,36 @@ +import unittest + +from clarifai.models.model_serving import BaseTest + + +class CustomTest(unittest.TestCase): + """ + BaseTest loads the InferenceModel from the inference.py file in the current working directory. + To execute the predict method of the InferenceModel, use the predict method in BaseTest. + It takes the exact same inputs and inference parameters, returning the same outputs as InferenceModel.predict. + The difference is that BaseTest.predict verifies your_infer_parameters against config.clarifai_models.inference_parameters and checks the output values. + + For example, test input value of visual-classifier + + def test_input(self): + import cv2 + path = "path/to/image" + img = cv2.imread(path) + outputs = self.model.predict([img], infer_param1=..., infer_param2=...) + print(outputs) + assert outputs + + """ + + def setUp(self) -> None: + your_infer_parameter = dict( + ) # for example dict(float_var=0.12, string_var="test", _secret_string_var="secret") + self.model = BaseTest(your_infer_parameter) + + def test_default_cases(self): + """Test your model with dummy inputs. + In general, you only need to run this test to check your InferneceModel implementation. + In case the default inputs makes your model failed for some reason (not because of assert in `test_with_default_inputs`), + you can comment out this test. + """ + self.model.test_with_default_inputs() diff --git a/model_upload/visual_embedder/README.md b/model_upload/visual_embedder/README.md new file mode 100644 index 0000000..7c6106b --- /dev/null +++ b/model_upload/visual_embedder/README.md @@ -0,0 +1,36 @@ +## Visual Embedding Model Examples + +These can be used on the fly with minimal or no changes to test deploy visual embedding models to the Clarifai platform. See the required files section for each model below. + +* ### [vit-base](./vit-base/) + + Requirements to run tests locally: + + Download the [model checkpoint & sentencepiece bpe model from huggingface](https://huggingface.co/google/vit-base-patch16-224/tree/main) and store it under `vit-base/checkpoint` + ``` + huggingface-cli download google/vit-base-patch16-224 --local-dir vit-base/checkpoint --local-dir-use-symlinks False --exclude *.msgpack *.h5 *.safetensors + ``` + + Install dependecies to test locally + + ```bash + $ pip install -r vit-base/requirements.txt + ``` + + Deploy the model to Clarifai: + + >Note: set `--no-test` flag for `build` and `upload` command to disable testing + + 1. Build + + ```bash + $ clarifai build model ./vit-base + ``` + + upload `*.clarifai` file to storage to obtain direct download url + + 2. Upload + + ```bash + $ clarifai upload model ./vit-base --url + ``` \ No newline at end of file diff --git a/model_upload/visual_embedder/vit-base/clarifai_config.yaml b/model_upload/visual_embedder/vit-base/clarifai_config.yaml new file mode 100644 index 0000000..3c80316 --- /dev/null +++ b/model_upload/visual_embedder/vit-base/clarifai_config.yaml @@ -0,0 +1,37 @@ +# Sample config of inference_parameters and labels +# For detail, please refer to docs +# -------------------- +# inference_parameters: +# - path: boolean_var +# default_value: true +# field_type: 1 +# description: a boolean variable +# - path: string_var +# default_value: "a string" +# field_type: 2 +# description: a string variable +# - path: number_var +# default_value: 1 +# field_type: 3 +# description: a number variable +# - path: secret_string_var +# default_value: "YOUR_SECRET" +# field_type: 21 +# description: a string variable contains secret like API key +# labels: +# - concept1 +# - concept2 +# - concept3 +# - concept4 + + +clarifai_model: + clarifai_model_id: '' + clarifai_user_app_id: '' + description: '' + inference_parameters: [] + labels: [] + type: visual-embedder +serving_backend: + triton: + max_batch_size: 4 diff --git a/model_upload/visual_embedder/vit-base/inference.py b/model_upload/visual_embedder/vit-base/inference.py new file mode 100644 index 0000000..1b6445c --- /dev/null +++ b/model_upload/visual_embedder/vit-base/inference.py @@ -0,0 +1,46 @@ +# User model inference script. + +import os +from pathlib import Path +from typing import Dict, Union + +import torch +from transformers import AutoModel, ViTImageProcessor + +from clarifai.models.model_serving.model_config import * # noqa + + +class InferenceModel(VisualEmbedder): + """User model inference class.""" + + def __init__(self) -> None: + """ + Load inference time artifacts that are called frequently .e.g. models, tokenizers, etc. + in this method so they are loaded only once for faster inference. + """ + # current directory + self.base_path: Path = os.path.dirname(__file__) + self.huggingface_model_path = os.path.join(self.base_path, "checkpoint") + self.processor = ViTImageProcessor.from_pretrained(self.huggingface_model_path) + self.model = AutoModel.from_pretrained(self.huggingface_model_path) + + def predict(self, input_data: list, + inference_parameters: Dict[str, Union[str, float, int]]) -> list: + """ Custom prediction function for `visual-embedder` model. + + Args: + input_data (List[np.ndarray]): List of image + inference_parameters (Dict[str, Union[str, float, int]]): your inference parameters + + Returns: + list of EmbeddingOutput + + """ + outputs = [] + inputs = self.processor(images=input_data, return_tensors="pt") + with torch.no_grad(): + embedding_vectors = self.model(**inputs).last_hidden_state[:, 0].cpu().numpy() + for embedding_vector in embedding_vectors: + outputs.append(EmbeddingOutput(embedding_vector=embedding_vector)) + + return outputs diff --git a/model_upload/visual_embedder/vit-base/requirements.txt b/model_upload/visual_embedder/vit-base/requirements.txt new file mode 100644 index 0000000..e79c653 --- /dev/null +++ b/model_upload/visual_embedder/vit-base/requirements.txt @@ -0,0 +1,5 @@ +clarifai +tritonclient[all] +torch==1.13.1 +transformers==4.38.0 +Pillow==10.3.0 diff --git a/model_upload/visual_embedder/vit-base/test.py b/model_upload/visual_embedder/vit-base/test.py new file mode 100644 index 0000000..3e90821 --- /dev/null +++ b/model_upload/visual_embedder/vit-base/test.py @@ -0,0 +1,40 @@ +import unittest + +from clarifai.models.model_serving.repo_build import BaseTest + + +class CustomTest(unittest.TestCase): + """ + BaseTest loads the InferenceModel from the inference.py file in the current working directory. + To execute the predict method of the InferenceModel, use the predict method in BaseTest. + It takes the exact same inputs and inference parameters, returning the same outputs as InferenceModel.predict. + The difference is that BaseTest.predict verifies your_infer_parameters against config.clarifai_models.inference_parameters and checks the output values. + + For example, test input value of visual-classifier + + def test_input(self): + import cv2 + path = "path/to/image" + img = cv2.imread(path) + outputs = self.model.predict([img], infer_param1=..., infer_param2=...) + print(outputs) + assert outputs + + """ + + def setUp(self) -> None: + your_infer_parameter = dict( + ) # for example dict(float_var=0.12, string_var="test", _secret_string_var="secret") + self.model = BaseTest(your_infer_parameter) + + def test_default_cases(self): + """Test your model with dummy inputs. + In general, you only need to run this test to check your InferneceModel implementation. + In case the default inputs makes your model failed for some reason (not because of assert in `test_with_default_inputs`), + you can comment out this test. + """ + self.model.test_with_default_inputs() + + def test_specific_case1(self): + """ Implement your test case""" + pass diff --git a/model_upload/visual_segmenter/README.md b/model_upload/visual_segmenter/README.md new file mode 100644 index 0000000..7703197 --- /dev/null +++ b/model_upload/visual_segmenter/README.md @@ -0,0 +1,36 @@ +## Visual Segmentation Model Examples + +These can be used on the fly with minimal or no changes to test deploy visual segmentation models to the Clarifai platform. See the required files section for each model below. + +* ### [segformer-b2](./segformer-b2/) + + Requirements to run tests locally: + + Download/Clone the [huggingface model](https://huggingface.co/mattmdjaga/segformer_b2_clothes) into the **segformer-b2/checkpoint** directory. + ``` + $ huggingface-cli download mattmdjaga/segformer_b2_clothes --local-dir segformer-b2/checkpoint --local-dir-use-symlinks False --exclude *.safetensors optimizer.pt + ``` + + Install dependecies to test locally + + ```bash + $ pip install -r segformer-b2/requirements.txt + ``` + + Deploy the model to Clarifai: + + >Note: set `--no-test` flag for `build` and `upload` command to disable testing + + 1. Build + + ```bash + $ clarifai build model ./segformer-b2 + ``` + + upload `*.clarifai` file to storage to obtain direct download url + + 2. Upload + + ```bash + $ clarifai upload model ./segformer-b2 --url + ``` diff --git a/model_upload/visual_segmenter/segformer-b2/clarifai_config.yaml b/model_upload/visual_segmenter/segformer-b2/clarifai_config.yaml new file mode 100644 index 0000000..937bd27 --- /dev/null +++ b/model_upload/visual_segmenter/segformer-b2/clarifai_config.yaml @@ -0,0 +1,28 @@ +clarifai_model: + clarifai_model_id: '' + clarifai_user_app_id: '' + description: '' + inference_parameters: [] + labels: + - background + - hat + - hair + - sunglass + - upper-clothes + - skirt + - pants + - dress + - belt + - left-shoe + - right-shoe + - face + - left-leg + - right-leg + - left-arm + - right-arm + - bag + - scarf + type: visual-segmenter +serving_backend: + triton: + max_batch_size: 4 diff --git a/model_upload/visual_segmenter/segformer-b2/inference.py b/model_upload/visual_segmenter/segformer-b2/inference.py new file mode 100644 index 0000000..f570433 --- /dev/null +++ b/model_upload/visual_segmenter/segformer-b2/inference.py @@ -0,0 +1,49 @@ +# User model inference script. + +import os +from pathlib import Path +from typing import Dict, Union + +import torch +from transformers import AutoModelForSemanticSegmentation, SegformerImageProcessor + +from clarifai.models.model_serving.model_config import * # noqa + + +class InferenceModel(VisualSegmenter): + """User model inference class.""" + + def __init__(self) -> None: + """ + Load inference time artifacts that are called frequently .e.g. models, tokenizers, etc. + in this method so they are loaded only once for faster inference. + """ + # current directory + self.base_path: Path = os.path.dirname(__file__) + self.huggingface_model_path = os.path.join(self.base_path, "checkpoint") + self.processor = SegformerImageProcessor.from_pretrained(self.huggingface_model_path) + self.model = AutoModelForSemanticSegmentation.from_pretrained(self.huggingface_model_path) + + def predict(self, input_data: list, + inference_parameters: Dict[str, Union[str, float, int]]) -> list: + """ Custom prediction function for `visual-segmenter` model. + + Args: + input_data (List[np.ndarray]): List of image + inference_parameters (Dict[str, Union[str, float, int]]): your inference parameters + + Returns: + list of MasksOutput + + """ + outputs = [] + + inputs = self.processor(images=input_data, return_tensors="pt") + with torch.no_grad(): + output = self.model(**inputs) + logits = output.logits.cpu() + for logit in logits: + mask = logit.argmax(dim=0).numpy() + outputs.append(MasksOutput(predicted_mask=mask)) + + return outputs diff --git a/model_upload/visual_segmenter/segformer-b2/requirements.txt b/model_upload/visual_segmenter/segformer-b2/requirements.txt new file mode 100644 index 0000000..963a573 --- /dev/null +++ b/model_upload/visual_segmenter/segformer-b2/requirements.txt @@ -0,0 +1,5 @@ +clarifai +torch==1.13.1 +tritonclient[all] +transformers==4.38.0 +Pillow==10.3.0 diff --git a/model_upload/visual_segmenter/segformer-b2/test.py b/model_upload/visual_segmenter/segformer-b2/test.py new file mode 100644 index 0000000..3e90821 --- /dev/null +++ b/model_upload/visual_segmenter/segformer-b2/test.py @@ -0,0 +1,40 @@ +import unittest + +from clarifai.models.model_serving.repo_build import BaseTest + + +class CustomTest(unittest.TestCase): + """ + BaseTest loads the InferenceModel from the inference.py file in the current working directory. + To execute the predict method of the InferenceModel, use the predict method in BaseTest. + It takes the exact same inputs and inference parameters, returning the same outputs as InferenceModel.predict. + The difference is that BaseTest.predict verifies your_infer_parameters against config.clarifai_models.inference_parameters and checks the output values. + + For example, test input value of visual-classifier + + def test_input(self): + import cv2 + path = "path/to/image" + img = cv2.imread(path) + outputs = self.model.predict([img], infer_param1=..., infer_param2=...) + print(outputs) + assert outputs + + """ + + def setUp(self) -> None: + your_infer_parameter = dict( + ) # for example dict(float_var=0.12, string_var="test", _secret_string_var="secret") + self.model = BaseTest(your_infer_parameter) + + def test_default_cases(self): + """Test your model with dummy inputs. + In general, you only need to run this test to check your InferneceModel implementation. + In case the default inputs makes your model failed for some reason (not because of assert in `test_with_default_inputs`), + you can comment out this test. + """ + self.model.test_with_default_inputs() + + def test_specific_case1(self): + """ Implement your test case""" + pass diff --git a/models/model_upload/README.md b/models/model_upload/README.md index 332f5cd..8adcb26 100644 --- a/models/model_upload/README.md +++ b/models/model_upload/README.md @@ -2,8 +2,6 @@ Clarifai provides an easy-to-use platform to serve AI/ML models in production. -This feature is currently in Private Preview. We'd love for you to try it out and provide your feedback. To do so please sign up for Private Preview [here](https://forms.gle/MSx7QNxmug2oFZYD6). - There are collection of pre-built model examples for different tasks. This guide will walk you through the process of uploading custom models to the Clarifai platform, leveraging pre-built model examples for different tasks. You'll also learn to customize models and adjust configurations for deployment.