Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
93d4e7f
feat: add transformers qwen2.5 omni
weedge Apr 12, 2025
6944130
feat: add transformers qwen2.5 omni modal demo
weedge Apr 12, 2025
ac829c8
feat: add transformers qwen2.5 omni modal demo
weedge Apr 12, 2025
1549a13
change run_omni_cases.sh
weedge Apr 12, 2025
d7a375b
run batch requests cases
weedge Apr 12, 2025
28e00b0
add qwen2.5 omni web demo on modal
weedge Apr 12, 2025
d42ce73
add async to sync generator
weedge Apr 13, 2025
1d9cb39
add src/llm/vllm/qwen2_5omni.py
weedge Apr 14, 2025
f4f832a
change asr stream
weedge Apr 14, 2025
c69a18c
fix: qwen2.5 omni apply_chat_template system content check
weedge Apr 15, 2025
f68fb91
fix: system content
weedge Apr 15, 2025
d8a2b9d
feat: add thinker_talker_inference_stream and Qwen2_5OmniForCondition…
weedge Apr 16, 2025
995a76d
feat: add omni_chatting_stream and qwen2_code2wav torch_compile_mode…
weedge Apr 17, 2025
4f3d181
feat: add thinker_chunk_stream omni_chatting_stream omni_chatting_seg…
weedge Apr 19, 2025
0939f81
fix talker_generate_chunk
weedge Apr 19, 2025
b771aa6
feat: use first thinker generate hidden states for talker_inputs_embeds
weedge Apr 19, 2025
2ec9a2e
feat: use first thinker generate hidden states for talker_inputs_embeds
weedge Apr 20, 2025
7db983c
fix: thinker_generate_chunk thinker_new_hidden_states thinker_new_hid…
weedge Apr 20, 2025
d22749a
fix: thinker_generate_chunk thinker_new_hidden_states thinker_new_hid…
weedge Apr 20, 2025
21c2d9a
feat: add Qwen2_5OmniForConditionalGenerationStreaming model, Transfo…
weedge Apr 21, 2025
15cc298
add session history chat
weedge Apr 21, 2025
98359cf
add LivekitAsrQwen2_5OmniVoiceBot LivekitQwen2_5OmniVoiceBot
weedge Apr 21, 2025
485dbc0
feat: add qwen2_5omni_asr and unit test
weedge Apr 21, 2025
00ac0d9
feat: add LivekitQwen2_5OmniVisionVoiceBot
weedge Apr 21, 2025
cfa39e9
change pyproject add llm_transformers_manual_vision_voice_qwen
weedge Apr 21, 2025
026d03b
fix: qwen2_5omni_asr test
weedge Apr 21, 2025
7c7e8c0
fix: qwen2_5omni_asr test
weedge Apr 21, 2025
dd3a5e0
fix: add get_qwen2_5omni_transformers_args
weedge Apr 21, 2025
e44e2c1
feat: add Qwen2_5OmnVisionVoiceProcessor and bot
weedge Apr 21, 2025
122bb01
feat: add Qwen2_5OmnVisionVoiceProcessor and bot
weedge Apr 21, 2025
28359e7
fix: change log
weedge Apr 21, 2025
ce9dd6e
feat: support audio input thinker chunk stream
weedge Apr 22, 2025
93a225c
feat: add image_stream, image_chunk_stream
weedge Apr 22, 2025
7200c21
fix:
weedge Apr 22, 2025
4dfdb12
feat: add thinker_all_talker_stream for only return audio case
weedge Apr 22, 2025
f637be1
fix: warmup
weedge Apr 22, 2025
faace45
feat: add LivekitAsrQwen2_5OmniVoiceBot LivekitQwen2_5OmniVoiceBot Li…
weedge Apr 23, 2025
dfa608d
fix: thinker chunk stream use stop_strings_per_step
weedge Apr 24, 2025
be51a9c
feat: add text/vision/audio -> chunk text+speech stream use sliding …
weedge Apr 24, 2025
d7089f2
change fastapi_webrtc_qwen2_5omni_vision_voice_bot_serve deploy
weedge Apr 25, 2025
e44755e
fix: embedding mask and sleep no_stream_sleep_time yield to allow oth…
weedge Apr 25, 2025
d7b1358
change version
weedge Apr 25, 2025
809a61b
change version
weedge Apr 25, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ main-dev.py
tmp.*
*.wav
*.mp3
*.mp4
*.flv
*.vtt

Expand Down Expand Up @@ -206,4 +207,4 @@ runs
.ruff_cache/

# xml
*.xml
*.xml
167 changes: 166 additions & 1 deletion deploy/modal/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,9 @@ IMAGE_NAME=minicpmo IMAGE_CONCURRENT_CN=1 IMAGE_GPU=L4 modal serve -e achatbot s

# moonshotai/Kimi-VL-A3B-Instruct (or Thinking) use 2xL4 like deepseek-ai/deepseek-vl2-small
IMAGE_NAME=kimi IMAGE_CONCURRENT_CN=1 IMAGE_GPU=L4:2 modal serve -e achatbot src/fastapi_webrtc_vision_bot_serve.py

# webrtc_vision_bot serve on qwen2.5omni vision llm pip image
IMAGE_NAME=qwen2.5omni IMAGE_CONCURRENT_CN=1 IMAGE_GPU=L4 modal serve -e achatbot src/fastapi_webrtc_vision_bot_serve.py
```
- curl api to run chat room bot with webrtc (daily/livekit/agora)
```shell
Expand Down Expand Up @@ -384,7 +387,7 @@ curl --location 'https://weedge-achatbot--fastapi-webrtc-freeze-omni-voice-bo-4b
```

### webrtc_minicpmo_vision_voice_bot
- run webrtc_minicpmo_vision_voice_bot serve with task queue(redis)
- run webrtc_minicpmo_vision_voice_bot serve
```shell
# webrtc_audio_bot serve on default pip image
# need create .env.example to modal Secrets for webrtc key
Expand Down Expand Up @@ -560,6 +563,168 @@ curl --location 'https://weedge-achatbot--fastapi-webrtc-minicpmo-omni-bot-srv-a
"config_list": []
}'
```
### webrtc_qwen2_5omni_vision_voice_bot
- run webrtc_qwen2_5omni_vision_voice_bot serve with webrtc
```shell
# webrtc_audio_bot serve on default pip image
# need create .env.example to modal Secrets for webrtc key
IMAGE_CONCURRENT_CN=1 IMAGE_GPU=L40s modal serve -e achatbot src/fastapi_webrtc_qwen2_5omni_vision_voice_bot_serve.py
```
- curl api to run chat room bot with webrtc (livekit_room)
```shell
# thinker gen chunk token and hidden states -> talker gen vq codes token -> code2wav gen chunk wav | don't use_sliding_window_code2wav
curl --location 'https://weedge-achatbot--fastapi-webrtc-qwen2-5omni-bot-srv-app-dev.modal.run/bot_join/chat-room/LivekitQwen2_5OmniVisionVoiceBot' \
--header 'Content-Type: application/json' \
--data '{
"chat_bot_name": "LivekitQwen2_5OmniVisionVoiceBot",
"room_name": "chat-room",
"room_url": "",
"token": "",
"room_manager": {
"tag": "livekit_room",
"args": {
"bot_name": "LivekitQwen2_5OmniVisionVoiceBot",
"is_common_session": false
}
},
"services": {
"pipeline": "achatbot",
"vad": "silero",
"omni_llm": "llm_transformers_manual_qwen2_5omni_vision_voice"
},
"config": {
"vad": {
"tag": "silero_vad_analyzer",
"args": { "stop_secs": 0.7 }
},
"omni_llm": {
"tag": "llm_transformers_manual_qwen2_5omni_vision_voice",
"args": {
"lm_device": "cuda",
"lm_torch_dtype": "bfloat16",
"lm_attn_impl": "flash_attention_2",
"warmup_steps": 1,
"chat_history_size": 0,
"thinker_eos_token_ids": [151644, 151645],
"thinker_args": {
"lm_gen_temperature": 0.95,
"lm_gen_top_k": 20,
"lm_gen_top_p": 0.9,
"lm_gen_min_new_tokens": 1,
"lm_gen_max_new_tokens": 1024,
"lm_gen_max_tokens_per_step": 10,
"lm_gen_repetition_penalty": 1.1
},
"talker_args": {
"lm_gen_temperature": 0.95,
"lm_gen_top_k": 20,
"lm_gen_top_p": 0.9,
"lm_gen_min_new_tokens": 1,
"lm_gen_max_new_tokens": 2048,
"lm_gen_repetition_penalty": 1.1
},
"talker_skip_thinker_token_ids": [],
"talker_eos_token_ids": [8292, 8294],
"code2wav_args": {
"model_path": "/root/.achatbot/models/Qwen/Qwen2.5-Omni-7B",
"enable_torch_compile": false,
"enable_torch_compile_first_chunk": false,
"odeint_method": "euler",
"odeint_method_relaxed": false,
"batched_chunk": 3,
"frequency": "50hz",
"device": "cuda",
"num_steps": 10,
"guidance_scale": 0.5,
"sway_coefficient": -1.0,
"code2wav_dynamic_batch": false
},
"speaker": "Chelsie",
"is_use_sliding_window_code2wav": false,
"lm_model_name_or_path": "/root/.achatbot/models/Qwen/Qwen2.5-Omni-7B"
}
}
},
"config_list": []
}
'
# thinker gen chunk token and hidden states -> talker gen vq codes token -> code2wav gen chunk wav | use_sliding_window_code2wav | no torch.compile
curl --location 'https://weedge-achatbot--fastapi-webrtc-qwen2-5omni-bot-srv-app-dev.modal.run/bot_join/chat-room/LivekitQwen2_5OmniVisionVoiceBot' \
--header 'Content-Type: application/json' \
--data '{
"chat_bot_name": "LivekitQwen2_5OmniVisionVoiceBot",
"room_name": "chat-room",
"room_url": "",
"token": "",
"room_manager": {
"tag": "livekit_room",
"args": {
"bot_name": "LivekitQwen2_5OmniVisionVoiceBot",
"is_common_session": false
}
},
"services": {
"pipeline": "achatbot",
"vad": "silero",
"omni_llm": "llm_transformers_manual_qwen2_5omni_vision_voice"
},
"config": {
"vad": {
"tag": "silero_vad_analyzer",
"args": { "stop_secs": 0.7 }
},
"omni_llm": {
"tag": "llm_transformers_manual_qwen2_5omni_vision_voice",
"args": {
"lm_device": "cuda",
"lm_torch_dtype": "bfloat16",
"lm_attn_impl": "flash_attention_2",
"warmup_steps": 1,
"chat_history_size": 0,
"thinker_eos_token_ids": [151644, 151645],
"thinker_args": {
"lm_gen_temperature": 0.95,
"lm_gen_top_k": 20,
"lm_gen_top_p": 0.9,
"lm_gen_min_new_tokens": 1,
"lm_gen_max_new_tokens": 1024,
"lm_gen_max_tokens_per_step": 10,
"lm_gen_repetition_penalty": 1.1
},
"talker_args": {
"lm_gen_temperature": 0.95,
"lm_gen_top_k": 20,
"lm_gen_top_p": 0.9,
"lm_gen_min_new_tokens": 1,
"lm_gen_max_new_tokens": 2048,
"lm_gen_repetition_penalty": 1.1
},
"talker_skip_thinker_token_ids": [],
"talker_eos_token_ids": [8292, 8294],
"code2wav_args": {
"model_path": "/root/.achatbot/models/Qwen/Qwen2.5-Omni-7B",
"enable_torch_compile": false,
"enable_torch_compile_first_chunk": false,
"odeint_method": "euler",
"odeint_method_relaxed": false,
"batched_chunk": 3,
"frequency": "50hz",
"device": "cuda",
"num_steps": 10,
"guidance_scale": 0.5,
"sway_coefficient": -1.0,
"code2wav_dynamic_batch": false
},
"speaker": "Chelsie",
"is_use_sliding_window_code2wav": true,
"lm_model_name_or_path": "/root/.achatbot/models/Qwen/Qwen2.5-Omni-7B"
}
}
},
"config_list": []
}
'
```

### webrtc_step_voice_bot
- run webrtc_step_voice_bot serve with task queue(redis)
Expand Down
2 changes: 1 addition & 1 deletion deploy/modal/src/download_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
retries=0,
cpu=8.0,
image=download_image,
secrets=[modal.Secret.from_name("achatbot")],
# secrets=[modal.Secret.from_name("achatbot")],
volumes={HF_MODEL_DIR: hf_model_vol},
timeout=1200,
scaledown_window=1200,
Expand Down
22 changes: 15 additions & 7 deletions deploy/modal/src/fastapi_webrtc_minicpmo_vision_voice_bot_serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,6 @@ class ContainerRuntimeConfig:
"ACHATBOT_PKG": "1",
"LOG_LEVEL": os.getenv("LOG_LEVEL", "info"),
"IMAGE_NAME": os.getenv("IMAGE_NAME", "default"),
"ASR_TAG": "sense_voice_asr",
"ASR_LANG": "zn",
"ASR_MODEL_NAME_OR_PATH": "/root/.achatbot/models/FunAudioLLM/SenseVoiceSmall",
"USE_GPTQ_CKPT": os.getenv("USE_GPTQ_CKPT", ""),
"LLM_MODEL_NAME_OR_PATH": f'/root/.achatbot/models/{os.getenv("LLM_MODEL_NAME_OR_PATH", "openbmb/MiniCPM-o-2_6")}',
# https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#gpu-feature-list
Expand All @@ -42,10 +39,9 @@ class ContainerRuntimeConfig:
"fastapi_bot_server,"
"livekit,livekit-api,daily,agora,"
"silero_vad_analyzer,"
"sense_voice_asr,deepgram_asr_processor,"
"llm_transformers_manual_vision_voice_minicpmo,"
"queue"
"]~=0.0.8.12",
"]~=0.0.9.post10",
"huggingface_hub[hf_transfer]==0.26.0",
"wget",
],
Expand Down Expand Up @@ -170,8 +166,20 @@ def setup(self):

@modal.enter()
def enter(self):
print("enter done")
# volume.reload()
# run container runtime to enter when container is starting
import subprocess
import torch

subprocess.run("nvidia-smi --version", shell=True)
gpu_prop = None
if torch.cuda.is_available():
gpu_prop = torch.cuda.get_device_properties("cuda:0")
print(gpu_prop)
torch.multiprocessing.set_start_method("spawn", force=True)
else:
print("CUDA is not available.")

# todo: init model to load, now use api to load model to run bot with config

@modal.asgi_app()
def app(self):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import modal
import os

achatbot_version = os.getenv("ACHATBOT_VERSION", "0.0.9.post10")
qwen2_5omni_img = (
# https://catalog.ngc.nvidia.com/orgs/nvidia/containers/cuda/tags
modal.Image.from_registry(
"nvidia/cuda:12.6.1-cudnn-devel-ubuntu22.04",
add_python="3.10",
)
.apt_install("git", "git-lfs", "ffmpeg", "clang", "cmake")
.pip_install("wheel")
.pip_install(
[
"achatbot["
"fastapi_bot_server,"
"livekit,livekit-api,daily,agora,"
"silero_vad_analyzer,asr_processor,"
"llm_transformers_manual_vision_voice_qwen,"
"queue"
f"]=={achatbot_version}",
],
extra_index_url=os.getenv("EXTRA_INDEX_URL", "https://pypi.org/simple/"),
)
.run_commands(
"pip install git+https://github.com/huggingface/[email protected]"
)
.pip_install("flash-attn", extra_options="--no-build-isolation")
.env(
{
"ACHATBOT_PKG": "1",
"LOG_LEVEL": os.getenv("LOG_LEVEL", "info"),
"LLM_MODEL_NAME_OR_PATH": f'/root/.achatbot/models/{os.getenv("LLM_MODEL_NAME_OR_PATH", "Qwen/Qwen2.5-Omni-7B")}',
# https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#gpu-feature-list
}
)
)


# ----------------------- app -------------------------------
app = modal.App("fastapi_webrtc_qwen2_5omni_bot")

HF_MODEL_DIR = "/root/.achatbot/models"
hf_model_vol = modal.Volume.from_name("models", create_if_missing=True)
ASSETS_DIR = "/root/.achatbot/assets"
assets_dir = modal.Volume.from_name("assets", create_if_missing=True)


# 128 MiB of memory and 0.125 CPU cores by default container runtime
@app.cls(
image=qwen2_5omni_img,
gpu=os.getenv("IMAGE_GPU", None),
secrets=[modal.Secret.from_name("achatbot")],
volumes={
HF_MODEL_DIR: hf_model_vol,
ASSETS_DIR: assets_dir,
},
cpu=2.0,
timeout=1200, # default 300s
scaledown_window=1200,
max_containers=1,
allow_concurrent_inputs=int(os.getenv("IMAGE_CONCURRENT_CN", "1")),
)
class Srv:
@modal.enter()
def enter(self):
# run container runtime to enter when container is starting
import subprocess
import torch

subprocess.run("nvidia-smi --version", shell=True)
gpu_prop = None
if torch.cuda.is_available():
gpu_prop = torch.cuda.get_device_properties("cuda:0")
print(gpu_prop)
torch.multiprocessing.set_start_method("spawn", force=True)
else:
print("CUDA is not available.")

# todo: init model to load, now use api to load model to run bot with config

@modal.asgi_app()
def app(self):
from achatbot.cmd.http.server.fastapi_daily_bot_serve import app as fastapi_app

return fastapi_app
19 changes: 17 additions & 2 deletions deploy/modal/src/fastapi_webrtc_vision_bot_serve.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import modal
import os

achatbot_version = os.getenv("ACHATBOT_VERSION", "0.0.9.post8")
achatbot_version = os.getenv("ACHATBOT_VERSION", "0.0.9.post10")

vision_bot_img = (
# https://catalog.ngc.nvidia.com/orgs/nvidia/containers/cuda/tags
Expand All @@ -10,6 +10,7 @@
add_python="3.10",
)
.apt_install("git", "git-lfs", "ffmpeg", "cmake")
.pip_install("wheel")
.pip_install(
[
"achatbot["
Expand All @@ -25,7 +26,6 @@
],
extra_index_url=os.getenv("EXTRA_INDEX_URL", "https://pypi.org/simple/"),
)
.pip_install("wheel")
.pip_install("flash-attn", extra_options="--no-build-isolation")
.env(
{
Expand Down Expand Up @@ -123,6 +123,21 @@ class ContainerRuntimeConfig:
}
)
),
"qwen2_5omni": (
vision_bot_img.pip_install(
[
f"achatbot[llm_transformers_manual_vision_voice_qwen]=={achatbot_version}",
],
extra_index_url=os.getenv("EXTRA_INDEX_URL", "https://pypi.org/simple/"),
)
.run_commands("pip install git+https://github.com/huggingface/[email protected]")
.env(
{
"PYTORCH_CUDA_ALLOC_CONF": "expandable_segments:True",
"LLM_MODEL_NAME_OR_PATH": f'/root/.achatbot/models/{os.getenv("LLM_MODEL_NAME_OR_PATH", "Qwen/Qwen2.5-Omni-7B")}',
}
)
),
}

@staticmethod
Expand Down
Loading