feat: add offline model phi

camel-ai · Oct 28, 2024 · b901a78 · b901a78
1 parent 34869ce
commit b901a78
Show file tree

Hide file tree

Showing 8 changed files with 397 additions and 0 deletions.
diff --git a/camel/configs/__init__.py b/camel/configs/__init__.py
@@ -29,13 +29,16 @@
 from .togetherai_config import TOGETHERAI_API_PARAMS, TogetherAIConfig
 from .vllm_config import VLLM_API_PARAMS, VLLMConfig
 from .zhipuai_config import ZHIPUAI_API_PARAMS, ZhipuAIConfig
+from .phi_config import PHI_API_PARAMS, PHIConfig
 
 __all__ = [
     'BaseConfig',
     'ChatGPTConfig',
     'OPENAI_API_PARAMS',
     'AnthropicConfig',
     'ANTHROPIC_API_PARAMS',
+    'PHI_API_PARAMS',
+    'PHIConfig',
     'GROQ_API_PARAMS',
     'GroqConfig',
     'LiteLLMConfig',

diff --git a/camel/configs/phi_config.py b/camel/configs/phi_config.py
@@ -0,0 +1,39 @@
+# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
+from __future__ import annotations
+
+from typing import List, Optional
+
+from pydantic import Field
+
+from camel.configs.base_config import BaseConfig
+
+
+class PHIConfig(BaseConfig):
+    model: str = "microsoft/Phi-3.5-vision-instruct"
+    trust_remote_code: bool = True
+    max_model_len: int = 4096
+    limit_mm_per_prompt: dict = Field(default_factory=lambda: {"image": 2})
+    temperature: float = 0.0
+    max_tokens: int = 128
+    stop_token_ids: Optional[List[int]] = None
+    method: str = "generate"
+    image_urls: List[str] = Field(default_factory=list)
+    question: str = ""
+
+    class Config:
+        arbitrary_types_allowed = True
+
+
+PHI_API_PARAMS = {param for param in PHIConfig.model_fields.keys()}
diff --git a/camel/models/__init__.py b/camel/models/__init__.py
@@ -30,6 +30,7 @@
 from .togetherai_model import TogetherAIModel
 from .vllm_model import VLLMModel
 from .zhipuai_model import ZhipuAIModel
+from .phi_model import PHI_API_PARAMS
 
 __all__ = [
     'BaseModelBackend',
@@ -46,6 +47,7 @@
     'NemotronModel',
     'OllamaModel',
     'VLLMModel',
+    'PHI_API_PARAMS',
     'GeminiModel',
     'OpenAICompatibleModel',
     'RekaModel',

diff --git a/camel/models/model_factory.py b/camel/models/model_factory.py
@@ -106,6 +106,8 @@ def create(
             model_class = MistralModel
         elif model_platform.is_reka and model_type.is_reka:
             model_class = RekaModel
+        elif model_platform.is_phi_model and model_type.is_phi_model:
+            model_class = PhiModel
         elif model_type == ModelType.STUB:
             model_class = StubModel
 

diff --git a/camel/models/phi_model.py b/camel/models/phi_model.py
@@ -0,0 +1,142 @@
+# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
+from typing import Any, Dict, List, Optional, Union
+
+from vllm import LLM, SamplingParams
+from vllm.multimodal.utils import fetch_image
+
+from camel.configs import PHI_API_PARAMS, PHIConfig
+from camel.messages import OpenAIMessage
+from camel.models import BaseModelBackend
+from camel.types import ChatCompletion, ChatCompletionChunk, ModelType
+from camel.utils import BaseTokenCounter, OpenAITokenCounter
+
+
+class PHIModel(BaseModelBackend):
+    def __init__(
+        self,
+        model_type: ModelType,
+        model_config_dict: Dict[str, Any],
+        api_key: Optional[str] = None,
+        url: Optional[str] = None,
+        token_counter: Optional[BaseTokenCounter] = None,
+    ) -> None:
+        super().__init__(
+            model_type=model_type,
+            model_config_dict=model_config_dict,
+            api_key=api_key,
+            url=url,
+        )
+        self._token_counter = token_counter
+        self.config = PHIConfig(**model_config_dict)
+        self.llm = LLM(
+            model=self.config.model,
+            trust_remote_code=self.config.trust_remote_code,
+            max_model_len=self.config.max_model_len,
+            limit_mm_per_prompt=self.config.limit_mm_per_prompt,
+        )
+
+    @property
+    def token_counter(self) -> BaseTokenCounter:
+        r"""Initialize the token counter for the model backend.
+
+        Returns:
+            BaseTokenCounter: The token counter following the model's
+                tokenization style.
+        """
+        if not self._token_counter:
+            self._token_counter = OpenAITokenCounter(ModelType.GPT_4O_MINI)
+        return self._token_counter
+
+    def run(
+        self,
+        messages: List[OpenAIMessage],
+    ) -> Union[ChatCompletion, ChatCompletionChunk]:
+        question = messages[-1]['content']
+        image_urls = self.config.image_urls
+        image_data = [fetch_image(url) for url in image_urls]
+
+        sampling_params = SamplingParams(
+            temperature=self.config.temperature,
+            max_tokens=self.config.max_tokens,
+            stop_token_ids=self.config.stop_token_ids,
+        )
+
+        if self.config.method == "generate":
+            placeholders = "\n".join(
+                f"<|image_{i}|>" for i, _ in enumerate(image_urls, start=1)
+            )
+            prompt = (
+                f"<|user|>\n{placeholders}\n{question}<|end|>\n<|assistant|>\n"
+            )
+            outputs = self.llm.generate(
+                {"prompt": prompt, "multi_modal_data": {"image": image_data}},
+                sampling_params=sampling_params,
+            )
+        elif self.config.method == "chat":
+            outputs = self.llm.chat(
+                [
+                    {
+                        "role": "user",
+                        "content": [{"type": "text", "text": question}]
+                        + [
+                            {"type": "image_url", "image_url": {"url": url}}
+                            for url in image_urls
+                        ],
+                    }
+                ],
+                sampling_params=sampling_params,
+            )
+        else:
+            raise ValueError(f"Invalid method: {self.config.method}")
+
+        # Convert vLLM output to OpenAI-like format
+        response = ChatCompletion(
+            id="vllm_response",
+            object="chat.completion",
+            created=0,
+            model=self.config.model,
+            choices=[
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": outputs[0].outputs[0].text,
+                    },
+                    "finish_reason": "stop",
+                }
+            ],
+            usage={
+                "prompt_tokens": 0,
+                "completion_tokens": 0,
+                "total_tokens": 0,
+            },
+        )
+        return response
+
+    def check_model_config(self):
+        for param in self.model_config_dict:
+            if param not in PHI_API_PARAMS:
+                raise ValueError(
+                    f"Unexpected argument `{param}` is "
+                    "input into VLLM model backend."
+                )
+
+    @property
+    def token_limit(self) -> int:
+        return self.config.max_model_len
+
+    @property
+    def stream(self) -> bool:
+        return False  # VLLM doesn't support streaming in this implementation
diff --git a/camel/types/enums.py b/camel/types/enums.py
@@ -87,6 +87,9 @@ class ModelType(UnifiedModelType, Enum):
     REKA_FLASH = "reka-flash"
     REKA_EDGE = "reka-edge"
 
+    # Offline models
+    PHI_3_5_VISION = "microsoft/Phi-3.5-vision-instruct"
+
     def __str__(self):
         return self.value
 
@@ -440,6 +443,7 @@ class ModelPlatformType(Enum):
     ZHIPU = "zhipuai"
     GEMINI = "gemini"
     VLLM = "vllm"
+    PHI = "phi"
     MISTRAL = "mistral"
     REKA = "reka"
     TOGETHER = "together"
@@ -476,6 +480,16 @@ def is_vllm(self) -> bool:
         r"""Returns whether this platform is vllm."""
         return self is ModelPlatformType.VLLM
 
+    @property
+    def is_phi(self) -> bool:
+        r"""Returns whether this type of models is a OFFLINE MODEL model.
+        Returns:
+            bool: Whether this type of models is OFFLINE MODEL.
+        """
+        return self in {
+            ModelType.PHI_3_5_VISION,
+        }
+
     @property
     def is_together(self) -> bool:
         r"""Returns whether this platform is together."""

diff --git a/examples/models/phi_model_example.py b/examples/models/phi_model_example.py
@@ -0,0 +1,138 @@
+# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
+from camel.agents import ChatAgent
+from camel.configs import PHIConfig
+from camel.messages import BaseMessage
+from camel.models import ModelFactory
+from camel.types import ModelPlatformType, ModelType
+
+# Example image URLs
+IMAGE_URLS = [
+    "https://upload.wikimedia.org/wikipedia/commons/d/da/2015_Kaczka_krzy%C5%BCowka_w_wodzie_%28samiec%29.jpg",
+    "https://upload.wikimedia.org/wikipedia/commons/7/77/002_The_lion_king_Snyggve_in_the_Serengeti_National_Park_Photo_by_Giles_Laurent.jpg",
+]
+
+# Create VLLMConfig
+phi_config = PHIConfig(
+    model="microsoft/Phi-3.5-vision-instruct",
+    image_urls=IMAGE_URLS,
+    question="What is the content of each image?",
+)
+
+# Create VLLMModel
+phi_model = ModelFactory.create(
+    model_platform=ModelPlatformType.PHI,
+    model_type=ModelType.PHI_3_5_VISION,
+    model_config_dict=phi_config.dict(),
+)
+
+# Define system message
+sys_msg = BaseMessage.make_assistant_message(
+    role_name="Assistant", content="You are a helpful assistant."
+)
+
+# Set agent
+camel_agent = ChatAgent(system_message=sys_msg, model=phi_model)
+
+user_msg = BaseMessage.make_user_message(
+    role_name="User",
+    content="""Say hi to CAMEL AI, one open-source community dedicated to the 
+    study of autonomous and communicative agents.""",
+)
+
+# Get response information
+response = camel_agent.step(user_msg)
+print(response.msgs[0].content)
+
+"""
+===============================================================================
+INFO 09-20 15:54:42 llm_engine.py:223] 
+    Initializing an LLM engine (v0.6.1.post2) 
+    with config: model='microsoft/Phi-3.5-vision-instruct', 
+    speculative_config=None, tokenizer='microsoft/Phi-3.5-vision-instruct', 
+    skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, 
+    override_neuron_config=None, rope_scaling=None, rope_theta=None, 
+    tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, 
+    max_seq_len=4096, download_dir=None, load_format=LoadFormat.AUTO, 
+    tensor_parallel_size=1, pipeline_parallel_size=1, 
+    disable_custom_all_reduce=False, 
+    quantization=None, enforce_eager=False, kv_cache_dtype=auto, 
+    quantization_param_path=None, device_config=cuda, 
+    decoding_config=DecodingConfig(guided_decoding_backend='outlines'), 
+    observability_config=ObservabilityConfig(otlp_traces_endpoint=None, 
+    collect_model_forward_time=False, collect_model_execute_time=False),
+    seed=0, served_model_name=microsoft/Phi-3.5-vision-instruct, 
+    use_v2_block_manager=False, num_scheduler_steps=1, 
+    enable_prefix_caching=False, use_async_output_proc=True)
+INFO 09-20 15:54:42 selector.py:240] 
+    Cannot use FlashAttention-2 backend due to sliding window.
+INFO 09-20 15:54:42 selector.py:116] Using XFormers backend.
+    /home/mi/anaconda3/envs/camel/lib/python3.10/site-packages/xformers/ops/fmha/flash.py:211: 
+    FutureWarning: `torch.library.impl_abstract` 
+    was renamed to `torch.library.register_fake`. 
+    Please use that instead; we will remove 
+    `torch.library.impl_abstract` in a future version of PyTorch.
+    @torch.library.impl_abstract("xformers_flash::flash_fwd")
+    /home/mi/anaconda3/envs/camel/lib/python3.10/site-packages/xformers/ops/fmha/flash.py:344: 
+    FutureWarning: `torch.library.impl_abstract` 
+    was renamed to `torch.library.register_fake`. 
+    Please use that instead; we will remove 
+    `torch.library.impl_abstract` in a future version of PyTorch.
+    @torch.library.impl_abstract("xformers_flash::flash_bwd")
+INFO 09-20 15:54:43 model_runner.py:997] 
+    Starting to load model microsoft/Phi-3.5-vision-instruct...
+INFO 09-20 15:54:43 selector.py:240] 
+    Cannot use FlashAttention-2 backend due to sliding window.
+INFO 09-20 15:54:43 selector.py:116] Using XFormers backend.
+INFO 09-20 15:54:44 weight_utils.py:242] 
+    Using model weights format ['*.safetensors']
+Loading safetensors checkpoint shards:   0% Completed | 0/2 [00:00<?, ?it/s]
+Loading safetensors checkpoint shards:  
+                                50% Completed | 1/2 [00:00<00:00,  3.10it/s]
+Loading safetensors checkpoint shards: 
+                                100% Completed | 2/2 [00:00<00:00,  2.48it/s]
+Loading safetensors checkpoint shards: 
+                                100% Completed | 2/2 [00:00<00:00,  2.56it/s]
+
+INFO 09-20 15:54:46 model_runner.py:1008] Loading model weights took 7.7498 GB
+    /home/mi/anaconda3/envs/camel/lib/python3.10/site-packages/transformers/models/auto/image_processing_auto.py:513: 
+    FutureWarning: The image_processor_class argument 
+    is deprecated and will be removed in v4.42. 
+    Please use `slow_image_processor_class`, 
+    or `fast_image_processor_class` instead
+    warnings.warn(
+INFO 09-20 15:54:47 gpu_executor.py:122] # GPU blocks: 2197, # CPU blocks: 682
+INFO 09-20 15:54:48 model_runner.py:1311] Capturing the model for CUDA graphs. 
+    This may lead to unexpected consequences if the model is not static. 
+    To run the model in eager mode, 
+    set 'enforce_eager=True' or use '--enforce-eager' in the CLI.
+INFO 09-20 15:54:48 model_runner.py:1315] 
+    CUDA graphs can take additional 1~3 GiB memory per GPU. 
+    If you are running out of memory, 
+    consider decreasing `gpu_memory_utilization` or enforcing eager mode. 
+    You can also reduce the `max_num_seqs` as needed to decrease memory usage.
+INFO 09-20 15:54:57 model_runner.py:1430] Graph capturing finished in 9 secs.
+    Processed prompts: 100%|██████████████████████████████████| 
+    1/1 [00:01<00:00,  1.43s/it, est. 
+    speed input: 1090.18 toks/s, output: 53.32 toks/s]
+ 
+
+The first image shows a duck floating on water, 
+with its reflection visible on the surface. 
+The duck has a green head, yellow bill, 
+and a brown body with white patches. 
+The second image depicts a lion sitting in a grassy field. 
+The lion has a golden mane and is looking directly at the camera.
+===============================================================================
+"""