Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions benchmarks/qwen3-tts/vllm_omni/configs/qwen3_tts_bs1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,6 @@ stage_args:

runtime:
enabled: true
defaults:
window_size: -1
max_inflight: 1

connectors:
connector_of_shared_memory:
name: SharedMemoryConnector
Expand All @@ -90,4 +86,3 @@ runtime:
edges:
- from: 0
to: 1
window_size: -1
5 changes: 0 additions & 5 deletions benchmarks/qwen3-tts/vllm_omni/configs/qwen3_tts_bs16.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,6 @@ stage_args:

runtime:
enabled: true
defaults:
window_size: -1
max_inflight: 16

connectors:
connector_of_shared_memory:
name: SharedMemoryConnector
Expand All @@ -91,4 +87,3 @@ runtime:
edges:
- from: 0
to: 1
window_size: -1
5 changes: 0 additions & 5 deletions benchmarks/qwen3-tts/vllm_omni/configs/qwen3_tts_bs4.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,6 @@ stage_args:

runtime:
enabled: true
defaults:
window_size: -1
max_inflight: 4

connectors:
connector_of_shared_memory:
name: SharedMemoryConnector
Expand All @@ -91,4 +87,3 @@ runtime:
edges:
- from: 0
to: 1
window_size: -1
6 changes: 0 additions & 6 deletions docs/configuration/pd_disaggregation.md
Original file line number Diff line number Diff line change
Expand Up @@ -145,19 +145,13 @@ Compared with the default Qwen3-Omni config:
```yaml
runtime:
enabled: true
defaults:
window_size: -1
max_inflight: 1
edges:
- from: 0
to: 1
window_size: -1
- from: 1
to: 2
window_size: -1
- from: 2
to: 3
window_size: -1
```

## 4. Launch with your custom config
Expand Down
6 changes: 1 addition & 5 deletions docs/configuration/stage_configs.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,16 +114,12 @@ stage_args:
# Top-level runtime config (concise): default windows and stage edges
runtime:
enabled: true
defaults:
window_size: -1 # Simplified: trigger downstream only after full upstream completion
max_inflight: 1 # Simplified: process serially within each stage

edges:
- from: 0 # thinker → talker: trigger only after receiving full input (-1)
to: 1
window_size: -1
- from: 1 # talker → code2wav: trigger only after receiving full input (-1)
to: 2
window_size: -1

```

Expand Down
5 changes: 0 additions & 5 deletions docs/configuration/stage_configs/qwen2_5_omni.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,8 @@ stage_args:
# Top-level runtime config (concise): default windows and stage edges
runtime:
enabled: true
defaults:
window_size: -1 # Simplified: trigger downstream only after full upstream completion
max_inflight: 1 # Simplified: process serially within each stage
edges:
- from: 0 # thinker → talker: trigger only after receiving full input (-1)
to: 1
window_size: -1
- from: 1 # talker → code2wav: trigger only after receiving full input (-1)
to: 2
window_size: -1
2 changes: 0 additions & 2 deletions docs/user_guide/examples/offline_inference/bagel.md
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,6 @@ Example configuration for TP=2 on GPUs 0 and 1:

| Parameter | Value | Description |
| :-------------------- | :------ | :------------------------------- |
| `window_size` | `-1` | Window size (-1 means unlimited) |
| `max_inflight` | `1` | Maximum inflight requests |
| `shm_threshold_bytes` | `65536` | Shared memory threshold (64KB) |

## Using Mooncake Connector
Expand Down
2 changes: 0 additions & 2 deletions examples/offline_inference/bagel/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,6 @@ Example configuration for TP=2 on GPUs 0 and 1:

| Parameter | Value | Description |
| :-------------------- | :------ | :------------------------------- |
| `window_size` | `-1` | Window size (-1 means unlimited) |
| `max_inflight` | `1` | Maximum inflight requests |
| `shm_threshold_bytes` | `65536` | Shared memory threshold (64KB) |

## Using Mooncake Connector
Expand Down
5 changes: 0 additions & 5 deletions tests/dfx/perf/stage_configs/qwen3_tts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,6 @@ stage_args:

runtime:
enabled: true
defaults:
window_size: -1
max_inflight: 4

connectors:
connector_of_shared_memory:
name: SharedMemoryConnector
Expand All @@ -93,4 +89,3 @@ runtime:
edges:
- from: 0
to: 1
window_size: -1
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,6 @@ stage_args:
# Top-level runtime config with Mooncake connector
runtime:
enabled: true
defaults:
window_size: -1
max_inflight: 1
connectors:
mooncake_connector:
name: MooncakeConnector
Expand All @@ -80,4 +77,3 @@ runtime:
edges:
- from: 0
to: 1
window_size: -1
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,6 @@ stage_args:
# Runtime edges
runtime:
enabled: true
defaults:
window_size: -1
max_inflight: 1

# Distributed connectors configuration (optional)
# More connectors will be supported in the future.
connectors:
Expand All @@ -78,4 +74,3 @@ runtime:
edges:
- from: 0
to: 1
window_size: -1
Original file line number Diff line number Diff line change
Expand Up @@ -91,13 +91,8 @@ stage_args:
# Top-level runtime config (concise): default windows and stage edges
runtime:
enabled: true
defaults:
window_size: -1 # Simplified: trigger downstream only after full upstream completion
max_inflight: 1 # Simplified: process serially within each stage
edges:
- from: 0 # thinker → talker: trigger only after receiving full input (-1)
to: 1
window_size: -1
- from: 1 # talker → code2wav: trigger only after receiving full input (-1)
to: 2
window_size: -1
5 changes: 0 additions & 5 deletions tests/e2e/stage_configs/dynin_omni_ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,8 @@ stage_args:

runtime:
enabled: true
defaults:
window_size: -1
max_inflight: 1
edges:
- from: 0
to: 1
window_size: -1
- from: 1
to: 2
window_size: -1
5 changes: 0 additions & 5 deletions tests/e2e/stage_configs/qwen2_5_omni_ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -97,13 +97,8 @@ stage_args:
# Top-level runtime config (concise): default windows and stage edges
runtime:
enabled: true
defaults:
window_size: -1 # Simplified: trigger downstream only after full upstream completion
max_inflight: 1 # Simplified: process serially within each stage
edges:
- from: 0 # thinker → talker: trigger only after receiving full input (-1)
to: 1
window_size: -1
- from: 1 # talker → code2wav: trigger only after receiving full input (-1)
to: 2
window_size: -1
5 changes: 0 additions & 5 deletions tests/e2e/stage_configs/rocm/qwen2_5_omni_ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -94,13 +94,8 @@ stage_args:
# Top-level runtime config (concise): default windows and stage edges
runtime:
enabled: true
defaults:
window_size: -1 # Simplified: trigger downstream only after full upstream completion
max_inflight: 1 # Simplified: process serially within each stage
edges:
- from: 0 # thinker → talker: trigger only after receiving full input (-1)
to: 1
window_size: -1
- from: 1 # talker → code2wav: trigger only after receiving full input (-1)
to: 2
window_size: -1
6 changes: 0 additions & 6 deletions tests/e2e/stage_configs/xpu/qwen2_5_omni_ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -95,14 +95,8 @@ stage_args:
# Top-level runtime config (concise): default windows and stage edges
runtime:
enabled: true
defaults:
window_size: -1 # Simplified: trigger downstream only after full upstream completion
max_inflight: 1 # Simplified: process serially within each stage

edges:
- from: 0 # thinker → talker: trigger only after receiving full input (-1)
to: 1
window_size: -1
- from: 1 # talker → code2wav: trigger only after receiving full input (-1)
to: 2
window_size: -1
1 change: 0 additions & 1 deletion vllm_omni/model_executor/models/qwen3_tts/pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -90,4 +90,3 @@ connectors:
edges:
- from: 0
to: 1
window_size: -1
5 changes: 0 additions & 5 deletions vllm_omni/model_executor/stage_configs/bagel.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,6 @@ stage_args:
# Runtime edges
runtime:
enabled: true
defaults:
Comment thread
NickCao marked this conversation as resolved.
window_size: -1
max_inflight: 1

# Distributed connectors configuration (optional)
# More connectors will be supported in the future.
connectors:
Expand Down Expand Up @@ -104,4 +100,3 @@ runtime:
edges:
- from: 0
to: 1
window_size: -1
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,6 @@ stage_args:
# Runtime edges
runtime:
enabled: true
defaults:
window_size: -1
max_inflight: 1

# Distributed connectors configuration (optional)
# More connectors will be supported in the future.
connectors:
Expand Down Expand Up @@ -104,4 +100,3 @@ runtime:
edges:
- from: 0
to: 1
window_size: -1
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,3 @@ stage_args:
# Runtime edges
runtime:
enabled: true
defaults:
window_size: -1
max_inflight: 1
4 changes: 0 additions & 4 deletions vllm_omni/model_executor/stage_configs/bagel_think.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,6 @@ stage_args:
# Runtime edges
runtime:
enabled: true
defaults:
window_size: -1
max_inflight: 1

connectors:
shared_memory_connector:
Expand All @@ -78,4 +75,3 @@ runtime:
edges:
- from: 0
to: 1
window_size: -1
4 changes: 0 additions & 4 deletions vllm_omni/model_executor/stage_configs/bagel_usp2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,6 @@ stage_args:

runtime:
enabled: true
defaults:
window_size: -1
max_inflight: 1
connectors:
shared_memory_connector:
name: SharedMemoryConnector
Expand All @@ -73,4 +70,3 @@ runtime:
edges:
- from: 0
to: 1
window_size: -1
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,6 @@ stage_args:

runtime:
enabled: true
defaults:
window_size: -1
max_inflight: 1

connectors:
connector_of_shared_memory:
Expand All @@ -82,4 +79,3 @@ runtime:
edges:
- from: 0
to: 1
window_size: -1
5 changes: 0 additions & 5 deletions vllm_omni/model_executor/stage_configs/dynin_omni.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,9 @@ stage_args:
# Top-level runtime config (concise): default windows and stage edges
runtime:
enabled: true
defaults:
window_size: -1 # Simplified: trigger downstream only after full upstream completion
max_inflight: 1 # Simplified: process serially within each stage

edges:
- from: 0
to: 1
window_size: -1
- from: 1
to: 2
window_size: -1
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,6 @@ stage_args:
# Top-level runtime config (concise): default windows and stage edges
runtime:
enabled: true
defaults:
window_size: -1 # Simplified: trigger downstream only after full upstream completion
max_inflight: 1 # Simplified: process serially within each stage
####
# same as Qwen2.5_omni version
# Distributed connectors configuration (optional)
Expand Down Expand Up @@ -108,7 +105,5 @@ runtime:
edges:
- from: 0
to: 1
window_size: -1
- from: 1
to: 2
window_size: -1
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,6 @@ stage_args:

runtime:
enabled: true
defaults:
window_size: -1
max_inflight: 16

connectors:
connector_of_shared_memory:
name: SharedMemoryConnector
Expand All @@ -93,4 +89,3 @@ runtime:
edges:
- from: 0
to: 1
window_size: -1
5 changes: 0 additions & 5 deletions vllm_omni/model_executor/stage_configs/glm_image.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,6 @@ stage_args:
# Top-level runtime config
runtime:
enabled: true
defaults:
window_size: -1 # Trigger downstream only after full upstream completion
max_inflight: 1 # Process serially within each stage

edges:
- from: 0 # AR → Diffusion: trigger after AR completes
to: 1
window_size: -1
Loading
Loading