fix: phi model code format

camel-ai · Nov 5, 2024 · c217e3b · c217e3b
1 parent 66d30ca
commit c217e3b
Show file tree

Hide file tree

Showing 3 changed files with 92 additions and 132 deletions.
diff --git a/camel/configs/phi_config.py b/camel/configs/phi_config.py
@@ -29,11 +29,49 @@ class PHIConfig(BaseConfig):
     max_tokens: int = 128
     stop_token_ids: Optional[List[int]] = None
     method: str = "generate"
-    image_urls: List[str] = Field(default_factory=list)
     question: str = ""
 
     class Config:
         arbitrary_types_allowed = True
 
 
 PHI_API_PARAMS = {param for param in PHIConfig.model_fields.keys()}
+
+"""
+INFO 11-05 21:20:17 config.py:107] Replacing legacy 'type' key with 'rope_type'
+WARNING 11-05 21:20:17 config.py:114] Replacing legacy rope_type 'su' with 'longrope'
+INFO 11-05 21:20:20 llm_engine.py:237] Initializing an LLM engine (v0.6.3.post1) with config: model='microsoft/Phi-3.5-vision-instruct', speculative_config=None, tokenizer='microsoft/Phi-3.5-vision-instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=4096, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=microsoft/Phi-3.5-vision-instruct, num_scheduler_steps=1, chunked_prefill_enabled=False multi_step_stream_outputs=True, enable_prefix_caching=False, use_async_output_proc=True, use_cached_outputs=False, mm_processor_kwargs=None)
+INFO 11-05 21:20:21 selector.py:247] Cannot use FlashAttention-2 backend due to sliding window.
+INFO 11-05 21:20:21 selector.py:115] Using XFormers backend.
+/home/mi/.cache/pypoetry/virtualenvs/camel-ai-34ULexV3-py3.10/lib/python3.10/site-packages/xformers/ops/fmha/flash.py:211: FutureWarning: `torch.library.impl_abstract` was renamed to `torch.library.register_fake`. Please use that instead; we will remove `torch.library.impl_abstract` in a future version of PyTorch.
+  @torch.library.impl_abstract("xformers_flash::flash_fwd")
+/home/mi/.cache/pypoetry/virtualenvs/camel-ai-34ULexV3-py3.10/lib/python3.10/site-packages/xformers/ops/fmha/flash.py:344: FutureWarning: `torch.library.impl_abstract` was renamed to `torch.library.register_fake`. Please use that instead; we will remove `torch.library.impl_abstract` in a future version of PyTorch.
+  @torch.library.impl_abstract("xformers_flash::flash_bwd")
+INFO 11-05 21:20:21 model_runner.py:1056] Starting to load model microsoft/Phi-3.5-vision-instruct...
+INFO 11-05 21:20:21 selector.py:247] Cannot use FlashAttention-2 backend due to sliding window.
+INFO 11-05 21:20:21 selector.py:115] Using XFormers backend.
+INFO 11-05 21:20:22 weight_utils.py:243] Using model weights format ['*.safetensors']
+
+Loading safetensors checkpoint shards:   0% Completed | 0/2 [00:00<?, ?it/s]
+
+Loading safetensors checkpoint shards:  50% Completed | 1/2 [00:00<00:00,  2.87it/s]
+
+Loading safetensors checkpoint shards: 100% Completed | 2/2 [00:00<00:00,  2.12it/s]
+
+Loading safetensors checkpoint shards: 100% Completed | 2/2 [00:00<00:00,  2.21it/s]
+
+INFO 11-05 21:20:23 model_runner.py:1067] Loading model weights took 7.9324 GB
+/home/mi/.cache/pypoetry/virtualenvs/camel-ai-34ULexV3-py3.10/lib/python3.10/site-packages/transformers/models/auto/image_processing_auto.py:520: FutureWarning: The image_processor_class argument is deprecated and will be removed in v4.42. Please use `slow_image_processor_class`, or `fast_image_processor_class` instead
+  warnings.warn(
+INFO 11-05 21:20:25 gpu_executor.py:122] # GPU blocks: 2153, # CPU blocks: 682
+INFO 11-05 21:20:25 gpu_executor.py:126] Maximum concurrency for 4096 tokens per request: 8.41x
+INFO 11-05 21:20:26 model_runner.py:1395] Capturing the model for CUDA graphs. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI.
+INFO 11-05 21:20:26 model_runner.py:1399] CUDA graphs can take additional 1~3 GiB memory per GPU. If you are running out of memory, consider decreasing `gpu_memory_utilization` or enforcing eager mode. You can also reduce the `max_num_seqs` as needed to decrease memory usage.
+INFO 11-05 21:20:36 model_runner.py:1523] Graph capturing finished in 10 secs.
+prompt_token_ids (old) [1, 32010, 29871, 13, 29966, 29989, 3027, 29918, 29896, 29989, 29958, 13, 5816, 29915, 29879, 297, 278, 1967, 29973, 32007, 29871, 13, 32001]
+
+Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]
+Processed prompts: 100%|██████████| 1/1 [00:00<00:00,  1.37it/s, est. speed input: 1062.35 toks/s, output: 57.72 toks/s]
+Processed prompts: 100%|██████████| 1/1 [00:00<00:00,  1.37it/s, est. speed input: 1062.35 toks/s, output: 57.72 toks/s]
+ The image shows a close-up of a young goat with a white and grey coat. The goat has a playful expression with its tongue sticking out and its ears perked up.
+"""
diff --git a/camel/models/phi_model.py b/camel/models/phi_model.py
@@ -12,14 +12,22 @@
 # limitations under the License.
 # =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
 from typing import Any, Dict, List, Optional, Union
-
+import base64
+from PIL import Image
+import io
+from openai import Stream
 from vllm import LLM, SamplingParams
 from vllm.multimodal.utils import fetch_image
 
 from camel.configs import PHI_API_PARAMS, PHIConfig
 from camel.messages import OpenAIMessage
 from camel.models import BaseModelBackend
-from camel.types import ChatCompletion, ChatCompletionChunk, ModelType
+from camel.types import (
+    ChatCompletion,
+    ChatCompletionChunk,
+    CompletionUsage,
+    ModelType,
+)
 from camel.utils import BaseTokenCounter, OpenAITokenCounter
 
 
@@ -49,23 +57,23 @@ def __init__(
 
     @property
     def token_counter(self) -> BaseTokenCounter:
-        r"""Initialize the token counter for the model backend.
-
-        Returns:
-            BaseTokenCounter: The token counter following the model's
-                tokenization style.
-        """
         if not self._token_counter:
             self._token_counter = OpenAITokenCounter(ModelType.GPT_4O_MINI)
         return self._token_counter
 
     def run(
         self,
         messages: List[OpenAIMessage],
-    ) -> Union[ChatCompletion, ChatCompletionChunk]:
-        question = messages[-1]['content']
-        image_urls = self.config.image_urls
-        image_data = [fetch_image(url) for url in image_urls]
+    ) -> Union[ChatCompletion, Stream[ChatCompletionChunk]]:
+        question = messages[1]['content'][0]['text']
+        image_urls = messages[1]['content'][1]['image_url']
+        if not isinstance(image_urls, list):
+            image_urls = [image_urls]
+
+        image_data = messages[1]['content'][1]['image_url']['url']
+        base64_data = image_data.split(',')[1]
+        image_bytes = base64.b64decode(base64_data)
+        image_data = Image.open(io.BytesIO(image_bytes))
 
         sampling_params = SamplingParams(
             temperature=self.config.temperature,
@@ -85,23 +93,22 @@ def run(
                 sampling_params=sampling_params,
             )
         elif self.config.method == "chat":
+            chat_messages = [
+                {
+                    "role": "user",
+                    "content": [{"type": "text", "text": question}]
+                    + [
+                        {"type": "image_url", "image_url": {"url": url}}
+                        for url in image_urls
+                    ],
+                }
+            ]
             outputs = self.llm.chat(
-                [
-                    {
-                        "role": "user",
-                        "content": [{"type": "text", "text": question}]
-                        + [
-                            {"type": "image_url", "image_url": {"url": url}}
-                            for url in image_urls
-                        ],
-                    }
-                ],
-                sampling_params=sampling_params,
+                chat_messages, sampling_params=sampling_params
             )
         else:
             raise ValueError(f"Invalid method: {self.config.method}")
 
-        # Convert vLLM output to OpenAI-like format
         response = ChatCompletion(
             id="vllm_response",
             object="chat.completion",
@@ -117,11 +124,11 @@ def run(
                     "finish_reason": "stop",
                 }
             ],
-            usage={
-                "prompt_tokens": 0,
-                "completion_tokens": 0,
-                "total_tokens": 0,
-            },
+            usage=CompletionUsage(
+                prompt_tokens=0,
+                completion_tokens=0,
+                total_tokens=0,
+            ),
         )
         return response
 
@@ -139,4 +146,4 @@ def token_limit(self) -> int:
 
     @property
     def stream(self) -> bool:
-        return False  # VLLM doesn't support streaming in this implementation
+        return False
diff --git a/examples/models/phi_model_example.py b/examples/models/phi_model_example.py
@@ -11,128 +11,43 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
+import requests
+from PIL import Image
+from io import BytesIO
+
 from camel.agents import ChatAgent
 from camel.configs import PHIConfig
 from camel.messages import BaseMessage
 from camel.models import ModelFactory
 from camel.types import ModelPlatformType, ModelType
 
-# Example image URLs
-IMAGE_URLS = [
-    "https://upload.wikimedia.org/wikipedia/commons/d/da/2015_Kaczka_krzy%C5%BCowka_w_wodzie_%28samiec%29.jpg",
-    "https://upload.wikimedia.org/wikipedia/commons/7/77/002_The_lion_king_Snyggve_in_the_Serengeti_National_Park_Photo_by_Giles_Laurent.jpg",
-]
-
-# Create VLLMConfig
-phi_config = PHIConfig(
-    model="microsoft/Phi-3.5-vision-instruct",
-    image_urls=IMAGE_URLS,
-    question="What is the content of each image?",
+# Define system message
+sys_msg = BaseMessage.make_assistant_message(
+    role_name="Assistant",
+    content="You are a helpful assistant.",
 )
 
 # Create VLLMModel
 phi_model = ModelFactory.create(
     model_platform=ModelPlatformType.PHI,
     model_type=ModelType.PHI_3_5_VISION,
-    model_config_dict=phi_config.dict(),
-)
-
-# Define system message
-sys_msg = BaseMessage.make_assistant_message(
-    role_name="Assistant", content="You are a helpful assistant."
+    model_config_dict=PHIConfig(temperature=0.0).as_dict(),
 )
 
 # Set agent
 camel_agent = ChatAgent(system_message=sys_msg, model=phi_model)
 
+# Example image URLs
+url = "https://www.washingtonian.com/wp-content/uploads/2017/06/6-30-17-goat-yoga-congressional-cemetery-1-994x559.jpg"
+response = requests.get(url)
+img = Image.open(BytesIO(response.content))
+
 user_msg = BaseMessage.make_user_message(
     role_name="User",
-    content="""Say hi to CAMEL AI, one open-source community dedicated to the 
-    study of autonomous and communicative agents.""",
+    content="""what's in the image?""",
+    image_list=[img]
 )
 
 # Get response information
 response = camel_agent.step(user_msg)
 print(response.msgs[0].content)
-
-"""
-===============================================================================
-INFO 09-20 15:54:42 llm_engine.py:223] 
-    Initializing an LLM engine (v0.6.1.post2) 
-    with config: model='microsoft/Phi-3.5-vision-instruct', 
-    speculative_config=None, tokenizer='microsoft/Phi-3.5-vision-instruct', 
-    skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, 
-    override_neuron_config=None, rope_scaling=None, rope_theta=None, 
-    tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, 
-    max_seq_len=4096, download_dir=None, load_format=LoadFormat.AUTO, 
-    tensor_parallel_size=1, pipeline_parallel_size=1, 
-    disable_custom_all_reduce=False, 
-    quantization=None, enforce_eager=False, kv_cache_dtype=auto, 
-    quantization_param_path=None, device_config=cuda, 
-    decoding_config=DecodingConfig(guided_decoding_backend='outlines'), 
-    observability_config=ObservabilityConfig(otlp_traces_endpoint=None, 
-    collect_model_forward_time=False, collect_model_execute_time=False),
-    seed=0, served_model_name=microsoft/Phi-3.5-vision-instruct, 
-    use_v2_block_manager=False, num_scheduler_steps=1, 
-    enable_prefix_caching=False, use_async_output_proc=True)
-INFO 09-20 15:54:42 selector.py:240] 
-    Cannot use FlashAttention-2 backend due to sliding window.
-INFO 09-20 15:54:42 selector.py:116] Using XFormers backend.
-    /home/mi/anaconda3/envs/camel/lib/python3.10/site-packages/xformers/ops/fmha/flash.py:211: 
-    FutureWarning: `torch.library.impl_abstract` 
-    was renamed to `torch.library.register_fake`. 
-    Please use that instead; we will remove 
-    `torch.library.impl_abstract` in a future version of PyTorch.
-    @torch.library.impl_abstract("xformers_flash::flash_fwd")
-    /home/mi/anaconda3/envs/camel/lib/python3.10/site-packages/xformers/ops/fmha/flash.py:344: 
-    FutureWarning: `torch.library.impl_abstract` 
-    was renamed to `torch.library.register_fake`. 
-    Please use that instead; we will remove 
-    `torch.library.impl_abstract` in a future version of PyTorch.
-    @torch.library.impl_abstract("xformers_flash::flash_bwd")
-INFO 09-20 15:54:43 model_runner.py:997] 
-    Starting to load model microsoft/Phi-3.5-vision-instruct...
-INFO 09-20 15:54:43 selector.py:240] 
-    Cannot use FlashAttention-2 backend due to sliding window.
-INFO 09-20 15:54:43 selector.py:116] Using XFormers backend.
-INFO 09-20 15:54:44 weight_utils.py:242] 
-    Using model weights format ['*.safetensors']
-Loading safetensors checkpoint shards:   0% Completed | 0/2 [00:00<?, ?it/s]
-Loading safetensors checkpoint shards:  
-                                50% Completed | 1/2 [00:00<00:00,  3.10it/s]
-Loading safetensors checkpoint shards: 
-                                100% Completed | 2/2 [00:00<00:00,  2.48it/s]
-Loading safetensors checkpoint shards: 
-                                100% Completed | 2/2 [00:00<00:00,  2.56it/s]
-
-INFO 09-20 15:54:46 model_runner.py:1008] Loading model weights took 7.7498 GB
-    /home/mi/anaconda3/envs/camel/lib/python3.10/site-packages/transformers/models/auto/image_processing_auto.py:513: 
-    FutureWarning: The image_processor_class argument 
-    is deprecated and will be removed in v4.42. 
-    Please use `slow_image_processor_class`, 
-    or `fast_image_processor_class` instead
-    warnings.warn(
-INFO 09-20 15:54:47 gpu_executor.py:122] # GPU blocks: 2197, # CPU blocks: 682
-INFO 09-20 15:54:48 model_runner.py:1311] Capturing the model for CUDA graphs. 
-    This may lead to unexpected consequences if the model is not static. 
-    To run the model in eager mode, 
-    set 'enforce_eager=True' or use '--enforce-eager' in the CLI.
-INFO 09-20 15:54:48 model_runner.py:1315] 
-    CUDA graphs can take additional 1~3 GiB memory per GPU. 
-    If you are running out of memory, 
-    consider decreasing `gpu_memory_utilization` or enforcing eager mode. 
-    You can also reduce the `max_num_seqs` as needed to decrease memory usage.
-INFO 09-20 15:54:57 model_runner.py:1430] Graph capturing finished in 9 secs.
-    Processed prompts: 100%|██████████████████████████████████| 
-    1/1 [00:01<00:00,  1.43s/it, est. 
-    speed input: 1090.18 toks/s, output: 53.32 toks/s]
- 
-
-The first image shows a duck floating on water, 
-with its reflection visible on the surface. 
-The duck has a green head, yellow bill, 
-and a brown body with white patches. 
-The second image depicts a lion sitting in a grassy field. 
-The lion has a golden mane and is looking directly at the camera.
-===============================================================================
-"""