ai-bot-pro · weedge · Apr 25, 2025 · Apr 12, 2025 · Apr 12, 2025 · Apr 12, 2025
diff --git a/.gitignore b/.gitignore
@@ -162,6 +162,7 @@ main-dev.py
 tmp.*
 *.wav
 *.mp3
+*.mp4
 *.flv
 *.vtt
 
@@ -206,4 +207,4 @@ runs
 .ruff_cache/
 
 # xml
-*.xml
+*.xml
diff --git a/deploy/modal/README.md b/deploy/modal/README.md
@@ -174,6 +174,9 @@ IMAGE_NAME=minicpmo IMAGE_CONCURRENT_CN=1 IMAGE_GPU=L4 modal serve -e achatbot s
 
 # moonshotai/Kimi-VL-A3B-Instruct (or Thinking) use 2xL4 like deepseek-ai/deepseek-vl2-small
 IMAGE_NAME=kimi IMAGE_CONCURRENT_CN=1 IMAGE_GPU=L4:2 modal serve -e achatbot src/fastapi_webrtc_vision_bot_serve.py
+
+# webrtc_vision_bot serve on qwen2.5omni vision llm pip image
+IMAGE_NAME=qwen2.5omni IMAGE_CONCURRENT_CN=1 IMAGE_GPU=L4 modal serve -e achatbot src/fastapi_webrtc_vision_bot_serve.py
 ```
 - curl api to run chat room bot with webrtc (daily/livekit/agora)
 ```shell
@@ -384,7 +387,7 @@ curl --location 'https://weedge-achatbot--fastapi-webrtc-freeze-omni-voice-bo-4b
 ```
 
 ### webrtc_minicpmo_vision_voice_bot
-- run webrtc_minicpmo_vision_voice_bot serve with task queue(redis)
+- run webrtc_minicpmo_vision_voice_bot serve
 ```shell
 # webrtc_audio_bot serve on default pip image
 # need create .env.example to modal Secrets for webrtc key
@@ -560,6 +563,168 @@ curl --location 'https://weedge-achatbot--fastapi-webrtc-minicpmo-omni-bot-srv-a
   "config_list": []
 }'
 ```
+### webrtc_qwen2_5omni_vision_voice_bot
+- run webrtc_qwen2_5omni_vision_voice_bot serve with webrtc
+```shell
+# webrtc_audio_bot serve on default pip image
+# need create .env.example to modal Secrets for webrtc key
+IMAGE_CONCURRENT_CN=1 IMAGE_GPU=L40s modal serve -e achatbot src/fastapi_webrtc_qwen2_5omni_vision_voice_bot_serve.py
+```
+- curl api to run chat room bot with webrtc (livekit_room)
+```shell
+# thinker gen chunk token and hidden states -> talker gen vq codes token -> code2wav gen chunk wav | don't use_sliding_window_code2wav
+curl --location 'https://weedge-achatbot--fastapi-webrtc-qwen2-5omni-bot-srv-app-dev.modal.run/bot_join/chat-room/LivekitQwen2_5OmniVisionVoiceBot' \
+--header 'Content-Type: application/json' \
+--data '{
+  "chat_bot_name": "LivekitQwen2_5OmniVisionVoiceBot",
+  "room_name": "chat-room",
+  "room_url": "",
+  "token": "",
+  "room_manager": {
+    "tag": "livekit_room",
+    "args": {
+      "bot_name": "LivekitQwen2_5OmniVisionVoiceBot",
+      "is_common_session": false
+    }
+  },
+  "services": {
+    "pipeline": "achatbot",
+    "vad": "silero",
+    "omni_llm": "llm_transformers_manual_qwen2_5omni_vision_voice"
+  },
+  "config": {
+    "vad": {
+      "tag": "silero_vad_analyzer",
+      "args": { "stop_secs": 0.7 }
+    },
+    "omni_llm": {
+      "tag": "llm_transformers_manual_qwen2_5omni_vision_voice",
+      "args": {
+        "lm_device": "cuda",
+        "lm_torch_dtype": "bfloat16",
+        "lm_attn_impl": "flash_attention_2",
+        "warmup_steps": 1,
+        "chat_history_size": 0,
+        "thinker_eos_token_ids": [151644, 151645],
+        "thinker_args": {
+          "lm_gen_temperature": 0.95,
+          "lm_gen_top_k": 20,
+          "lm_gen_top_p": 0.9,
+          "lm_gen_min_new_tokens": 1,
+          "lm_gen_max_new_tokens": 1024,
+          "lm_gen_max_tokens_per_step": 10,
+          "lm_gen_repetition_penalty": 1.1
+        },
+        "talker_args": {
+          "lm_gen_temperature": 0.95,
+          "lm_gen_top_k": 20,
+          "lm_gen_top_p": 0.9,
+          "lm_gen_min_new_tokens": 1,
+          "lm_gen_max_new_tokens": 2048,
+          "lm_gen_repetition_penalty": 1.1
+        },
+        "talker_skip_thinker_token_ids": [],
+        "talker_eos_token_ids": [8292, 8294],
+        "code2wav_args": {
+          "model_path": "/root/.achatbot/models/Qwen/Qwen2.5-Omni-7B",
+          "enable_torch_compile": false,
+          "enable_torch_compile_first_chunk": false,
+          "odeint_method": "euler",
+          "odeint_method_relaxed": false,
+          "batched_chunk": 3,
+          "frequency": "50hz",
+          "device": "cuda",
+          "num_steps": 10,
+          "guidance_scale": 0.5,
+          "sway_coefficient": -1.0,
+          "code2wav_dynamic_batch": false
+        },
+        "speaker": "Chelsie",
+        "is_use_sliding_window_code2wav": false,
+        "lm_model_name_or_path": "/root/.achatbot/models/Qwen/Qwen2.5-Omni-7B"
+      }
+    }
+  },
+  "config_list": []
+}
+'
+# thinker gen chunk token and hidden states -> talker gen vq codes token -> code2wav gen chunk wav | use_sliding_window_code2wav | no torch.compile
+curl --location 'https://weedge-achatbot--fastapi-webrtc-qwen2-5omni-bot-srv-app-dev.modal.run/bot_join/chat-room/LivekitQwen2_5OmniVisionVoiceBot' \
+--header 'Content-Type: application/json' \
+--data '{
+  "chat_bot_name": "LivekitQwen2_5OmniVisionVoiceBot",
+  "room_name": "chat-room",
+  "room_url": "",
+  "token": "",
+  "room_manager": {
+    "tag": "livekit_room",
+    "args": {
+      "bot_name": "LivekitQwen2_5OmniVisionVoiceBot",
+      "is_common_session": false
+    }
+  },
+  "services": {
+    "pipeline": "achatbot",
+    "vad": "silero",
+    "omni_llm": "llm_transformers_manual_qwen2_5omni_vision_voice"
+  },
+  "config": {
+    "vad": {
+      "tag": "silero_vad_analyzer",
+      "args": { "stop_secs": 0.7 }
+    },
+    "omni_llm": {
+      "tag": "llm_transformers_manual_qwen2_5omni_vision_voice",
+      "args": {
+        "lm_device": "cuda",
+        "lm_torch_dtype": "bfloat16",
+        "lm_attn_impl": "flash_attention_2",
+        "warmup_steps": 1,
+        "chat_history_size": 0,
+        "thinker_eos_token_ids": [151644, 151645],
+        "thinker_args": {
+          "lm_gen_temperature": 0.95,
+          "lm_gen_top_k": 20,
+          "lm_gen_top_p": 0.9,
+          "lm_gen_min_new_tokens": 1,
+          "lm_gen_max_new_tokens": 1024,
+          "lm_gen_max_tokens_per_step": 10,
+          "lm_gen_repetition_penalty": 1.1
+        },
+        "talker_args": {
+          "lm_gen_temperature": 0.95,
+          "lm_gen_top_k": 20,
+          "lm_gen_top_p": 0.9,
+          "lm_gen_min_new_tokens": 1,
+          "lm_gen_max_new_tokens": 2048,
+          "lm_gen_repetition_penalty": 1.1
+        },
+        "talker_skip_thinker_token_ids": [],
+        "talker_eos_token_ids": [8292, 8294],
+        "code2wav_args": {
+          "model_path": "/root/.achatbot/models/Qwen/Qwen2.5-Omni-7B",
+          "enable_torch_compile": false,
+          "enable_torch_compile_first_chunk": false,
+          "odeint_method": "euler",
+          "odeint_method_relaxed": false,
+          "batched_chunk": 3,
+          "frequency": "50hz",
+          "device": "cuda",
+          "num_steps": 10,
+          "guidance_scale": 0.5,
+          "sway_coefficient": -1.0,
+          "code2wav_dynamic_batch": false
+        },
+        "speaker": "Chelsie",
+        "is_use_sliding_window_code2wav": true,
+        "lm_model_name_or_path": "/root/.achatbot/models/Qwen/Qwen2.5-Omni-7B"
+      }
+    }
+  },
+  "config_list": []
+}
+'
+```
 
 ### webrtc_step_voice_bot
 - run webrtc_step_voice_bot serve with task queue(redis)

diff --git a/deploy/modal/src/download_models.py b/deploy/modal/src/download_models.py
@@ -27,7 +27,7 @@
     retries=0,
     cpu=8.0,
     image=download_image,
-    secrets=[modal.Secret.from_name("achatbot")],
+    # secrets=[modal.Secret.from_name("achatbot")],
     volumes={HF_MODEL_DIR: hf_model_vol},
     timeout=1200,
     scaledown_window=1200,

diff --git a/deploy/modal/src/fastapi_webrtc_minicpmo_vision_voice_bot_serve.py b/deploy/modal/src/fastapi_webrtc_minicpmo_vision_voice_bot_serve.py
@@ -16,9 +16,6 @@ class ContainerRuntimeConfig:
                     "ACHATBOT_PKG": "1",
                     "LOG_LEVEL": os.getenv("LOG_LEVEL", "info"),
                     "IMAGE_NAME": os.getenv("IMAGE_NAME", "default"),
-                    "ASR_TAG": "sense_voice_asr",
-                    "ASR_LANG": "zn",
-                    "ASR_MODEL_NAME_OR_PATH": "/root/.achatbot/models/FunAudioLLM/SenseVoiceSmall",
                     "USE_GPTQ_CKPT": os.getenv("USE_GPTQ_CKPT", ""),
                     "LLM_MODEL_NAME_OR_PATH": f'/root/.achatbot/models/{os.getenv("LLM_MODEL_NAME_OR_PATH", "openbmb/MiniCPM-o-2_6")}',
                     # https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#gpu-feature-list
@@ -42,10 +39,9 @@ class ContainerRuntimeConfig:
                     "fastapi_bot_server,"
                     "livekit,livekit-api,daily,agora,"
                     "silero_vad_analyzer,"
-                    "sense_voice_asr,deepgram_asr_processor,"
                     "llm_transformers_manual_vision_voice_minicpmo,"
                     "queue"
-                    "]~=0.0.8.12",
+                    "]~=0.0.9.post10",
                     "huggingface_hub[hf_transfer]==0.26.0",
                     "wget",
                 ],
@@ -170,8 +166,20 @@ def setup(self):
 
     @modal.enter()
     def enter(self):
-        print("enter done")
-        # volume.reload()
+        # run container runtime to enter when container is starting
+        import subprocess
+        import torch
+
+        subprocess.run("nvidia-smi --version", shell=True)
+        gpu_prop = None
+        if torch.cuda.is_available():
+            gpu_prop = torch.cuda.get_device_properties("cuda:0")
+            print(gpu_prop)
+            torch.multiprocessing.set_start_method("spawn", force=True)
+        else:
+            print("CUDA is not available.")
+
+        # todo: init model to load, now use api to load model to run bot with config
 
     @modal.asgi_app()
     def app(self):

diff --git a/deploy/modal/src/fastapi_webrtc_qwen2_5omni_vision_voice_bot_serve.py b/deploy/modal/src/fastapi_webrtc_qwen2_5omni_vision_voice_bot_serve.py
@@ -0,0 +1,86 @@
+import modal
+import os
+
+achatbot_version = os.getenv("ACHATBOT_VERSION", "0.0.9.post10")
+qwen2_5omni_img = (
+    # https://catalog.ngc.nvidia.com/orgs/nvidia/containers/cuda/tags
+    modal.Image.from_registry(
+        "nvidia/cuda:12.6.1-cudnn-devel-ubuntu22.04",
+        add_python="3.10",
+    )
+    .apt_install("git", "git-lfs", "ffmpeg", "clang", "cmake")
+    .pip_install("wheel")
+    .pip_install(
+        [
+            "achatbot["
+            "fastapi_bot_server,"
+            "livekit,livekit-api,daily,agora,"
+            "silero_vad_analyzer,asr_processor,"
+            "llm_transformers_manual_vision_voice_qwen,"
+            "queue"
+            f"]=={achatbot_version}",
+        ],
+        extra_index_url=os.getenv("EXTRA_INDEX_URL", "https://pypi.org/simple/"),
+    )
+    .run_commands(
+        "pip install git+https://github.com/huggingface/[email protected]"
+    )
+    .pip_install("flash-attn", extra_options="--no-build-isolation")
+    .env(
+        {
+            "ACHATBOT_PKG": "1",
+            "LOG_LEVEL": os.getenv("LOG_LEVEL", "info"),
+            "LLM_MODEL_NAME_OR_PATH": f'/root/.achatbot/models/{os.getenv("LLM_MODEL_NAME_OR_PATH", "Qwen/Qwen2.5-Omni-7B")}',
+            # https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#gpu-feature-list
+        }
+    )
+)
+
+
+# ----------------------- app -------------------------------
+app = modal.App("fastapi_webrtc_qwen2_5omni_bot")
+
+HF_MODEL_DIR = "/root/.achatbot/models"
+hf_model_vol = modal.Volume.from_name("models", create_if_missing=True)
+ASSETS_DIR = "/root/.achatbot/assets"
+assets_dir = modal.Volume.from_name("assets", create_if_missing=True)
+
+
+# 128 MiB of memory and 0.125 CPU cores by default container runtime
+@app.cls(
+    image=qwen2_5omni_img,
+    gpu=os.getenv("IMAGE_GPU", None),
+    secrets=[modal.Secret.from_name("achatbot")],
+    volumes={
+        HF_MODEL_DIR: hf_model_vol,
+        ASSETS_DIR: assets_dir,
+    },
+    cpu=2.0,
+    timeout=1200,  # default 300s
+    scaledown_window=1200,
+    max_containers=1,
+    allow_concurrent_inputs=int(os.getenv("IMAGE_CONCURRENT_CN", "1")),
+)
+class Srv:
+    @modal.enter()
+    def enter(self):
+        # run container runtime to enter when container is starting
+        import subprocess
+        import torch
+
+        subprocess.run("nvidia-smi --version", shell=True)
+        gpu_prop = None
+        if torch.cuda.is_available():
+            gpu_prop = torch.cuda.get_device_properties("cuda:0")
+            print(gpu_prop)
+            torch.multiprocessing.set_start_method("spawn", force=True)
+        else:
+            print("CUDA is not available.")
+
+        # todo: init model to load, now use api to load model to run bot with config
+
+    @modal.asgi_app()
+    def app(self):
+        from achatbot.cmd.http.server.fastapi_daily_bot_serve import app as fastapi_app
+
+        return fastapi_app
diff --git a/deploy/modal/src/fastapi_webrtc_vision_bot_serve.py b/deploy/modal/src/fastapi_webrtc_vision_bot_serve.py
@@ -1,7 +1,7 @@
 import modal
 import os
 
-achatbot_version = os.getenv("ACHATBOT_VERSION", "0.0.9.post8")
+achatbot_version = os.getenv("ACHATBOT_VERSION", "0.0.9.post10")
 
 vision_bot_img = (
     # https://catalog.ngc.nvidia.com/orgs/nvidia/containers/cuda/tags
@@ -10,6 +10,7 @@
         add_python="3.10",
     )
     .apt_install("git", "git-lfs", "ffmpeg", "cmake")
+    .pip_install("wheel")
     .pip_install(
         [
             "achatbot["
@@ -25,7 +26,6 @@
         ],
         extra_index_url=os.getenv("EXTRA_INDEX_URL", "https://pypi.org/simple/"),
     )
-    .pip_install("wheel")
     .pip_install("flash-attn", extra_options="--no-build-isolation")
     .env(
         {
@@ -123,6 +123,21 @@ class ContainerRuntimeConfig:
                 }
             )
         ),
+        "qwen2_5omni": (
+            vision_bot_img.pip_install(
+                [
+                    f"achatbot[llm_transformers_manual_vision_voice_qwen]=={achatbot_version}",
+                ],
+                extra_index_url=os.getenv("EXTRA_INDEX_URL", "https://pypi.org/simple/"),
+            )
+            .run_commands("pip install git+https://github.com/huggingface/[email protected]")
+            .env(
+                {
+                    "PYTORCH_CUDA_ALLOC_CONF": "expandable_segments:True",
+                    "LLM_MODEL_NAME_OR_PATH": f'/root/.achatbot/models/{os.getenv("LLM_MODEL_NAME_OR_PATH", "Qwen/Qwen2.5-Omni-7B")}',
+                }
+            )
+        ),
     }
 
     @staticmethod
-Original file line number
+Diff line change
@@ Expand Up / @@ -162,6 +162,7 @@ main-dev.py @@
     tmp.*
     *.wav
     *.mp3
+    *.mp4
     *.flv
     *.vtt
@@ Expand Down Expand Up / @@ -206,4 +207,4 @@ runs @@
     .ruff_cache/
     # xml
-    *.xml
+    *.xml