You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
2025-03-09 05:46:53,503 xinference.api.restful_api 1 ERROR [address=0.0.0.0:46697, pid=578] '<class 'sglang.srt.configs.qwen2_5_vl_config.Qwen2_5_VLConfig'>' is already used by a Transformers model.
Traceback (most recent call last):
File "/usr/local/lib/python3.10/dist-packages/xinference/api/restful_api.py", line 1002, in launch_model
model_uid = await (await self._get_supervisor_ref()).launch_builtin_model(
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 231, in send
return self._process_result_message(result)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 102, in _process_result_message
raise message.as_instanceof_cause()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 667, in send
result = await self._run_coro(message.message_id, coro)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 370, in _run_coro
return await coro
File "/usr/local/lib/python3.10/dist-packages/xoscar/api.py", line 384, in __on_receive__
return await super().__on_receive__(message) # type: ignore
File "xoscar/core.pyx", line 558, in __on_receive__
raise ex
File "xoscar/core.pyx", line 520, in xoscar.core._BaseActor.__on_receive__
async with self._lock:
File "xoscar/core.pyx", line 521, in xoscar.core._BaseActor.__on_receive__
with debug_async_timeout('actor_lock_timeout',
File "xoscar/core.pyx", line 526, in xoscar.core._BaseActor.__on_receive__
result = await result
File "/usr/local/lib/python3.10/dist-packages/xinference/core/supervisor.py", line 1190, in launch_builtin_model
await _launch_model()
File "/usr/local/lib/python3.10/dist-packages/xinference/core/supervisor.py", line 1125, in _launch_model
subpool_address = await _launch_one_model(
File "/usr/local/lib/python3.10/dist-packages/xinference/core/supervisor.py", line 1083, in _launch_one_model
subpool_address = await worker_ref.launch_builtin_model(
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 231, in send
return self._process_result_message(result)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 102, in _process_result_message
raise message.as_instanceof_cause()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 667, in send
result = await self._run_coro(message.message_id, coro)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 370, in _run_coro
return await coro
File "/usr/local/lib/python3.10/dist-packages/xoscar/api.py", line 384, in __on_receive__
return await super().__on_receive__(message) # type: ignore
File "xoscar/core.pyx", line 558, in __on_receive__
raise ex
File "xoscar/core.pyx", line 520, in xoscar.core._BaseActor.__on_receive__
async with self._lock:
File "xoscar/core.pyx", line 521, in xoscar.core._BaseActor.__on_receive__
with debug_async_timeout('actor_lock_timeout',
File "xoscar/core.pyx", line 526, in xoscar.core._BaseActor.__on_receive__
result = await result
File "/usr/local/lib/python3.10/dist-packages/xinference/core/utils.py", line 93, in wrapped
ret = await func(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/xinference/core/worker.py", line 926, in launch_builtin_model
await model_ref.load()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 231, in send
return self._process_result_message(result)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 102, in _process_result_message
raise message.as_instanceof_cause()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 667, in send
result = await self._run_coro(message.message_id, coro)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 370, in _run_coro
return await coro
File "/usr/local/lib/python3.10/dist-packages/xoscar/api.py", line 384, in __on_receive__
return await super().__on_receive__(message) # type: ignore
File "xoscar/core.pyx", line 558, in __on_receive__
raise ex
File "xoscar/core.pyx", line 520, in xoscar.core._BaseActor.__on_receive__
async with self._lock:
File "xoscar/core.pyx", line 521, in xoscar.core._BaseActor.__on_receive__
with debug_async_timeout('actor_lock_timeout',
File "xoscar/core.pyx", line 526, in xoscar.core._BaseActor.__on_receive__
result = await result
File "/usr/local/lib/python3.10/dist-packages/xinference/core/model.py", line 464, in load
self._model.load()
File "/usr/local/lib/python3.10/dist-packages/xinference/model/llm/sglang/core.py", line 215, in load
self._engine = sgl.Runtime(
File "/usr/local/lib/python3.10/dist-packages/sglang/api.py", line 38, in Runtime
return Runtime(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/sglang/lang/backend/runtime_endpoint.py", line 354, in __init__
from sglang.srt.entrypoints.http_server import launch_server
File "/usr/local/lib/python3.10/dist-packages/sglang/srt/entrypoints/http_server.py", line 44, in <module>
from sglang.srt.entrypoints.engine import _launch_subprocesses
File "/usr/local/lib/python3.10/dist-packages/sglang/srt/entrypoints/engine.py", line 36, in <module>
from sglang.srt.managers.data_parallel_controller import (
File "/usr/local/lib/python3.10/dist-packages/sglang/srt/managers/data_parallel_controller.py", line 27, in <module>
from sglang.srt.managers.io_struct import (
File "/usr/local/lib/python3.10/dist-packages/sglang/srt/managers/io_struct.py", line 25, in <module>
from sglang.srt.managers.schedule_batch import BaseFinishReason
File "/usr/local/lib/python3.10/dist-packages/sglang/srt/managers/schedule_batch.py", line 43, in <module>
from sglang.srt.configs.model_config import ModelConfig
File "/usr/local/lib/python3.10/dist-packages/sglang/srt/configs/__init__.py", line 4, in <module>
from sglang.srt.configs.qwen2_5_vl_config import (
File "/usr/local/lib/python3.10/dist-packages/sglang/srt/configs/qwen2_5_vl_config.py", line 1005, in <module>
AutoImageProcessor.register(Qwen2_5_VLConfig, None, Qwen2_5_VLImageProcessor, None)
File "/usr/local/lib/python3.10/dist-packages/transformers/models/auto/image_processing_auto.py", line 628, in register
IMAGE_PROCESSOR_MAPPING.register(
File "/usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py", line 833, in register
raise ValueError(f"'{key}' is already used by a Transformers model.")
ValueError: [address=0.0.0.0:46697, pid=578] '<class 'sglang.srt.configs.qwen2_5_vl_config.Qwen2_5_VLConfig'>' is already used by a Transformers model.
Expected behavior / 期待表现
成功启动模型
The text was updated successfully, but these errors were encountered:
System Info / 系統信息
CUDA Version: 12.8
Driver Version: 570.86.15
docker image: xprobe/xinference:nightly-main 54afc1a4f84f
docker version: Docker CE 27.5.1
OS: ubuntu focal
Running Xinference with Docker? / 是否使用 Docker 运行 Xinfernece?
Version info / 版本信息
xinference, version 1.3.0.post2+21.g220398d
The command used to start Xinference / 用以启动 xinference 的命令
Reproduction / 复现过程
docker exec -i xinference xinference launch --model-name llama-3.3-instruct --model-type LLM --model-engine SGLang --model-format awq --size-in-billions 70 --quantization Int4 --n-gpu auto --replica 1 --n-worker 1 --gpu-idx 0
模型启动时报告如下错误:
Expected behavior / 期待表现
成功启动模型
The text was updated successfully, but these errors were encountered: