Skip to content

Commit

Permalink
fix: phi model code format
Browse files Browse the repository at this point in the history
  • Loading branch information
NeilJohnson0930 committed Nov 5, 2024
1 parent 66d30ca commit c217e3b
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 132 deletions.
40 changes: 39 additions & 1 deletion camel/configs/phi_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,49 @@ class PHIConfig(BaseConfig):
max_tokens: int = 128
stop_token_ids: Optional[List[int]] = None
method: str = "generate"
image_urls: List[str] = Field(default_factory=list)
question: str = ""

class Config:
arbitrary_types_allowed = True


PHI_API_PARAMS = {param for param in PHIConfig.model_fields.keys()}

"""
INFO 11-05 21:20:17 config.py:107] Replacing legacy 'type' key with 'rope_type'
WARNING 11-05 21:20:17 config.py:114] Replacing legacy rope_type 'su' with 'longrope'
INFO 11-05 21:20:20 llm_engine.py:237] Initializing an LLM engine (v0.6.3.post1) with config: model='microsoft/Phi-3.5-vision-instruct', speculative_config=None, tokenizer='microsoft/Phi-3.5-vision-instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=4096, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=microsoft/Phi-3.5-vision-instruct, num_scheduler_steps=1, chunked_prefill_enabled=False multi_step_stream_outputs=True, enable_prefix_caching=False, use_async_output_proc=True, use_cached_outputs=False, mm_processor_kwargs=None)
INFO 11-05 21:20:21 selector.py:247] Cannot use FlashAttention-2 backend due to sliding window.
INFO 11-05 21:20:21 selector.py:115] Using XFormers backend.
/home/mi/.cache/pypoetry/virtualenvs/camel-ai-34ULexV3-py3.10/lib/python3.10/site-packages/xformers/ops/fmha/flash.py:211: FutureWarning: `torch.library.impl_abstract` was renamed to `torch.library.register_fake`. Please use that instead; we will remove `torch.library.impl_abstract` in a future version of PyTorch.
@torch.library.impl_abstract("xformers_flash::flash_fwd")
/home/mi/.cache/pypoetry/virtualenvs/camel-ai-34ULexV3-py3.10/lib/python3.10/site-packages/xformers/ops/fmha/flash.py:344: FutureWarning: `torch.library.impl_abstract` was renamed to `torch.library.register_fake`. Please use that instead; we will remove `torch.library.impl_abstract` in a future version of PyTorch.
@torch.library.impl_abstract("xformers_flash::flash_bwd")
INFO 11-05 21:20:21 model_runner.py:1056] Starting to load model microsoft/Phi-3.5-vision-instruct...
INFO 11-05 21:20:21 selector.py:247] Cannot use FlashAttention-2 backend due to sliding window.
INFO 11-05 21:20:21 selector.py:115] Using XFormers backend.
INFO 11-05 21:20:22 weight_utils.py:243] Using model weights format ['*.safetensors']
Loading safetensors checkpoint shards: 0% Completed | 0/2 [00:00<?, ?it/s]
Loading safetensors checkpoint shards: 50% Completed | 1/2 [00:00<00:00, 2.87it/s]
Loading safetensors checkpoint shards: 100% Completed | 2/2 [00:00<00:00, 2.12it/s]
Loading safetensors checkpoint shards: 100% Completed | 2/2 [00:00<00:00, 2.21it/s]
INFO 11-05 21:20:23 model_runner.py:1067] Loading model weights took 7.9324 GB
/home/mi/.cache/pypoetry/virtualenvs/camel-ai-34ULexV3-py3.10/lib/python3.10/site-packages/transformers/models/auto/image_processing_auto.py:520: FutureWarning: The image_processor_class argument is deprecated and will be removed in v4.42. Please use `slow_image_processor_class`, or `fast_image_processor_class` instead
warnings.warn(
INFO 11-05 21:20:25 gpu_executor.py:122] # GPU blocks: 2153, # CPU blocks: 682
INFO 11-05 21:20:25 gpu_executor.py:126] Maximum concurrency for 4096 tokens per request: 8.41x
INFO 11-05 21:20:26 model_runner.py:1395] Capturing the model for CUDA graphs. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI.
INFO 11-05 21:20:26 model_runner.py:1399] CUDA graphs can take additional 1~3 GiB memory per GPU. If you are running out of memory, consider decreasing `gpu_memory_utilization` or enforcing eager mode. You can also reduce the `max_num_seqs` as needed to decrease memory usage.
INFO 11-05 21:20:36 model_runner.py:1523] Graph capturing finished in 10 secs.
prompt_token_ids (old) [1, 32010, 29871, 13, 29966, 29989, 3027, 29918, 29896, 29989, 29958, 13, 5816, 29915, 29879, 297, 278, 1967, 29973, 32007, 29871, 13, 32001]
Processed prompts: 0%| | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]
Processed prompts: 100%|██████████| 1/1 [00:00<00:00, 1.37it/s, est. speed input: 1062.35 toks/s, output: 57.72 toks/s]
Processed prompts: 100%|██████████| 1/1 [00:00<00:00, 1.37it/s, est. speed input: 1062.35 toks/s, output: 57.72 toks/s]
The image shows a close-up of a young goat with a white and grey coat. The goat has a playful expression with its tongue sticking out and its ears perked up.
"""
67 changes: 37 additions & 30 deletions camel/models/phi_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,22 @@
# limitations under the License.
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
from typing import Any, Dict, List, Optional, Union

import base64
from PIL import Image
import io
from openai import Stream
from vllm import LLM, SamplingParams
from vllm.multimodal.utils import fetch_image

from camel.configs import PHI_API_PARAMS, PHIConfig
from camel.messages import OpenAIMessage
from camel.models import BaseModelBackend
from camel.types import ChatCompletion, ChatCompletionChunk, ModelType
from camel.types import (
ChatCompletion,
ChatCompletionChunk,
CompletionUsage,
ModelType,
)
from camel.utils import BaseTokenCounter, OpenAITokenCounter


Expand Down Expand Up @@ -49,23 +57,23 @@ def __init__(

@property
def token_counter(self) -> BaseTokenCounter:
r"""Initialize the token counter for the model backend.
Returns:
BaseTokenCounter: The token counter following the model's
tokenization style.
"""
if not self._token_counter:
self._token_counter = OpenAITokenCounter(ModelType.GPT_4O_MINI)
return self._token_counter

def run(
self,
messages: List[OpenAIMessage],
) -> Union[ChatCompletion, ChatCompletionChunk]:
question = messages[-1]['content']
image_urls = self.config.image_urls
image_data = [fetch_image(url) for url in image_urls]
) -> Union[ChatCompletion, Stream[ChatCompletionChunk]]:
question = messages[1]['content'][0]['text']
image_urls = messages[1]['content'][1]['image_url']
if not isinstance(image_urls, list):
image_urls = [image_urls]

image_data = messages[1]['content'][1]['image_url']['url']
base64_data = image_data.split(',')[1]
image_bytes = base64.b64decode(base64_data)
image_data = Image.open(io.BytesIO(image_bytes))

sampling_params = SamplingParams(
temperature=self.config.temperature,
Expand All @@ -85,23 +93,22 @@ def run(
sampling_params=sampling_params,
)
elif self.config.method == "chat":
chat_messages = [
{
"role": "user",
"content": [{"type": "text", "text": question}]
+ [
{"type": "image_url", "image_url": {"url": url}}
for url in image_urls
],
}
]
outputs = self.llm.chat(
[
{
"role": "user",
"content": [{"type": "text", "text": question}]
+ [
{"type": "image_url", "image_url": {"url": url}}
for url in image_urls
],
}
],
sampling_params=sampling_params,
chat_messages, sampling_params=sampling_params
)
else:
raise ValueError(f"Invalid method: {self.config.method}")

# Convert vLLM output to OpenAI-like format
response = ChatCompletion(
id="vllm_response",
object="chat.completion",
Expand All @@ -117,11 +124,11 @@ def run(
"finish_reason": "stop",
}
],
usage={
"prompt_tokens": 0,
"completion_tokens": 0,
"total_tokens": 0,
},
usage=CompletionUsage(
prompt_tokens=0,
completion_tokens=0,
total_tokens=0,
),
)
return response

Expand All @@ -139,4 +146,4 @@ def token_limit(self) -> int:

@property
def stream(self) -> bool:
return False # VLLM doesn't support streaming in this implementation
return False
117 changes: 16 additions & 101 deletions examples/models/phi_model_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,128 +11,43 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
import requests
from PIL import Image
from io import BytesIO

from camel.agents import ChatAgent
from camel.configs import PHIConfig
from camel.messages import BaseMessage
from camel.models import ModelFactory
from camel.types import ModelPlatformType, ModelType

# Example image URLs
IMAGE_URLS = [
"https://upload.wikimedia.org/wikipedia/commons/d/da/2015_Kaczka_krzy%C5%BCowka_w_wodzie_%28samiec%29.jpg",
"https://upload.wikimedia.org/wikipedia/commons/7/77/002_The_lion_king_Snyggve_in_the_Serengeti_National_Park_Photo_by_Giles_Laurent.jpg",
]

# Create VLLMConfig
phi_config = PHIConfig(
model="microsoft/Phi-3.5-vision-instruct",
image_urls=IMAGE_URLS,
question="What is the content of each image?",
# Define system message
sys_msg = BaseMessage.make_assistant_message(
role_name="Assistant",
content="You are a helpful assistant.",
)

# Create VLLMModel
phi_model = ModelFactory.create(
model_platform=ModelPlatformType.PHI,
model_type=ModelType.PHI_3_5_VISION,
model_config_dict=phi_config.dict(),
)

# Define system message
sys_msg = BaseMessage.make_assistant_message(
role_name="Assistant", content="You are a helpful assistant."
model_config_dict=PHIConfig(temperature=0.0).as_dict(),
)

# Set agent
camel_agent = ChatAgent(system_message=sys_msg, model=phi_model)

# Example image URLs
url = "https://www.washingtonian.com/wp-content/uploads/2017/06/6-30-17-goat-yoga-congressional-cemetery-1-994x559.jpg"
response = requests.get(url)
img = Image.open(BytesIO(response.content))

user_msg = BaseMessage.make_user_message(
role_name="User",
content="""Say hi to CAMEL AI, one open-source community dedicated to the
study of autonomous and communicative agents.""",
content="""what's in the image?""",
image_list=[img]
)

# Get response information
response = camel_agent.step(user_msg)
print(response.msgs[0].content)

"""
===============================================================================
INFO 09-20 15:54:42 llm_engine.py:223]
Initializing an LLM engine (v0.6.1.post2)
with config: model='microsoft/Phi-3.5-vision-instruct',
speculative_config=None, tokenizer='microsoft/Phi-3.5-vision-instruct',
skip_tokenizer_init=False, tokenizer_mode=auto, revision=None,
override_neuron_config=None, rope_scaling=None, rope_theta=None,
tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16,
max_seq_len=4096, download_dir=None, load_format=LoadFormat.AUTO,
tensor_parallel_size=1, pipeline_parallel_size=1,
disable_custom_all_reduce=False,
quantization=None, enforce_eager=False, kv_cache_dtype=auto,
quantization_param_path=None, device_config=cuda,
decoding_config=DecodingConfig(guided_decoding_backend='outlines'),
observability_config=ObservabilityConfig(otlp_traces_endpoint=None,
collect_model_forward_time=False, collect_model_execute_time=False),
seed=0, served_model_name=microsoft/Phi-3.5-vision-instruct,
use_v2_block_manager=False, num_scheduler_steps=1,
enable_prefix_caching=False, use_async_output_proc=True)
INFO 09-20 15:54:42 selector.py:240]
Cannot use FlashAttention-2 backend due to sliding window.
INFO 09-20 15:54:42 selector.py:116] Using XFormers backend.
/home/mi/anaconda3/envs/camel/lib/python3.10/site-packages/xformers/ops/fmha/flash.py:211:
FutureWarning: `torch.library.impl_abstract`
was renamed to `torch.library.register_fake`.
Please use that instead; we will remove
`torch.library.impl_abstract` in a future version of PyTorch.
@torch.library.impl_abstract("xformers_flash::flash_fwd")
/home/mi/anaconda3/envs/camel/lib/python3.10/site-packages/xformers/ops/fmha/flash.py:344:
FutureWarning: `torch.library.impl_abstract`
was renamed to `torch.library.register_fake`.
Please use that instead; we will remove
`torch.library.impl_abstract` in a future version of PyTorch.
@torch.library.impl_abstract("xformers_flash::flash_bwd")
INFO 09-20 15:54:43 model_runner.py:997]
Starting to load model microsoft/Phi-3.5-vision-instruct...
INFO 09-20 15:54:43 selector.py:240]
Cannot use FlashAttention-2 backend due to sliding window.
INFO 09-20 15:54:43 selector.py:116] Using XFormers backend.
INFO 09-20 15:54:44 weight_utils.py:242]
Using model weights format ['*.safetensors']
Loading safetensors checkpoint shards: 0% Completed | 0/2 [00:00<?, ?it/s]
Loading safetensors checkpoint shards:
50% Completed | 1/2 [00:00<00:00, 3.10it/s]
Loading safetensors checkpoint shards:
100% Completed | 2/2 [00:00<00:00, 2.48it/s]
Loading safetensors checkpoint shards:
100% Completed | 2/2 [00:00<00:00, 2.56it/s]
INFO 09-20 15:54:46 model_runner.py:1008] Loading model weights took 7.7498 GB
/home/mi/anaconda3/envs/camel/lib/python3.10/site-packages/transformers/models/auto/image_processing_auto.py:513:
FutureWarning: The image_processor_class argument
is deprecated and will be removed in v4.42.
Please use `slow_image_processor_class`,
or `fast_image_processor_class` instead
warnings.warn(
INFO 09-20 15:54:47 gpu_executor.py:122] # GPU blocks: 2197, # CPU blocks: 682
INFO 09-20 15:54:48 model_runner.py:1311] Capturing the model for CUDA graphs.
This may lead to unexpected consequences if the model is not static.
To run the model in eager mode,
set 'enforce_eager=True' or use '--enforce-eager' in the CLI.
INFO 09-20 15:54:48 model_runner.py:1315]
CUDA graphs can take additional 1~3 GiB memory per GPU.
If you are running out of memory,
consider decreasing `gpu_memory_utilization` or enforcing eager mode.
You can also reduce the `max_num_seqs` as needed to decrease memory usage.
INFO 09-20 15:54:57 model_runner.py:1430] Graph capturing finished in 9 secs.
Processed prompts: 100%|██████████████████████████████████|
1/1 [00:01<00:00, 1.43s/it, est.
speed input: 1090.18 toks/s, output: 53.32 toks/s]
The first image shows a duck floating on water,
with its reflection visible on the surface.
The duck has a green head, yellow bill,
and a brown body with white patches.
The second image depicts a lion sitting in a grassy field.
The lion has a golden mane and is looking directly at the camera.
===============================================================================
"""

0 comments on commit c217e3b

Please sign in to comment.