From 05c7b10b1ac57802be096b30574f09826a3efdbd Mon Sep 17 00:00:00 2001 From: RageLtMan Date: Mon, 2 Mar 2026 15:23:01 -0500 Subject: [PATCH 1/2] Implement Constrained Generation via LLGuidance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This implements the full llguidance integration enabling grammar-constrained inference for structured outputs, tool calling, and custom constraints. Architecture: - TopLevelGrammar serialized via rmp_serde across RPC boundaries - Grammar flows: Server → params.grammar → Runner → GuidanceState → Matcher - Inline correction via logits masking during sampling - Post-process correction via rollback on validation failure Key components: - params.grammar field in SamplingParams for RPC serialization GuidanceState - GuidanceState::new() with Matcher state management - GuidanceState::reset() for proper state cleanup - Rollback counter (MAX_ROLLBACK_ATTEMPTS=3) preventing infinite loops - guidance_failed/guidance_mismatch sets cleared on rollback - Vocab size validation in build_llg_factory() - Lark grammar generation from tools via build_tool_call_lark_grammar() CLI flags: - --enable-tool-grammar: Auto-build LLG grammar from MCP tools - --allow-constraint-api: Accept client-provided structured_outputs/response_format --- Cargo.toml | 4 +- ReadMe-CN.md | 26 +- ReadMe.md | 42 +- docs/goose.md | 16 +- docs/llguidance-integration.md | 2175 ++++++ example/special-tokens-extraction/Cargo.lock | 5851 +++++++++++++++++ example/special-tokens-extraction/Cargo.toml | 7 + example/special-tokens-extraction/README.md | 195 + example/special-tokens-extraction/src/main.rs | 82 + src/api.rs | 2 + src/core/block_manager.rs | 73 + src/core/engine.rs | 58 +- src/core/mod.rs | 36 + src/core/prefix_cache.rs | 30 + src/core/runner.rs | 284 +- src/core/scheduler.rs | 159 +- src/core/sequence.rs | 31 + src/main.rs | 62 +- src/py/mod.rs | 37 +- src/runner/mod.rs | 5 +- src/runner/runner.rs | 16 +- src/server/mod.rs | 325 +- src/server/parser.rs | 14 +- src/server/server.rs | 212 +- src/tools/helpers.rs | 10 + src/tools/mod.rs | 59 +- src/tools/parser.rs | 351 +- src/tools/schema.rs | 1312 +++- src/transfer/comm.rs | 10 +- src/utils/chat_template.rs | 13 + src/utils/command.rs | 6 +- src/utils/config.rs | 139 +- src/utils/guidance.rs | 1305 +++- src/utils/mod.rs | 5 +- src/utils/special_tokens.rs | 362 + 35 files changed, 13091 insertions(+), 223 deletions(-) create mode 100644 docs/llguidance-integration.md create mode 100644 example/special-tokens-extraction/Cargo.lock create mode 100644 example/special-tokens-extraction/Cargo.toml create mode 100644 example/special-tokens-extraction/README.md create mode 100644 example/special-tokens-extraction/src/main.rs create mode 100644 src/utils/special_tokens.rs diff --git a/Cargo.toml b/Cargo.toml index adfda6b9..aa2c7ccb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,8 @@ itertools = "0.13.0" akin = "0.4.0" indicatif = "0.17.11" serde_json = "1.0.108" -llguidance = "0.6" +llguidance = { version = "1.6", default-features = false, features = ["lark"] } +toktrie_hf_tokenizers = "1.6" toktrie = "1.4" half = { version = "2.5.0", features = ["num-traits", "use-intrinsics", "rand_distr"] } tokio = { version = "1.38.0", features = ["sync"] } @@ -35,6 +36,7 @@ interprocess = "2.2.2" serde-big-array = "0.5.1" bincode = { version = "1.3.1" } twox-hash = "2.1.1" +rmp-serde = "1.3.1" rand = "0.9.0" rayon="1.10.0" clap = { version = "4.4.7", features = ["derive"] } diff --git a/ReadMe-CN.md b/ReadMe-CN.md index cdb1a046..57acdde4 100644 --- a/ReadMe-CN.md +++ b/ReadMe-CN.md @@ -319,8 +319,30 @@ cargo install --features metal --- -## 🔌 MCP集成 (工具调用) +## 🔌 LLGuidance 支持(结构化输出与约束) + +vLLM.rs 现在支持通过 llguidance 库实现结构化输出和约束生成: + +- **工具调用优化**:使用 `--enable-tool-grammar` 启用工具调用语法,强制模型输出符合工具参数schema的JSON结构 +- **自定义约束**:使用 `--allow-constraint-api` 允许客户端通过 structured_outputs 或 response_format 提交 Lark/Regex/JSON Schema 约束 +- **正则表达式约束**:强制输出符合特定格式(如电话号码、日期等) +- **JSON Schema 约束**:通过 OpenAI 兼容的 response_format 或 structured_outputs 提交自定义约束 + +**使用示例:** +```bash +# 启用工具调用语法(自动从 MCP 工具构建 LLG 语法) +vllm-rs --m Qwen/Qwen3-30B-A3B-Instruct --enable-tool-grammar --ui-server + +# 启用客户端约束API(允许OpenAI风格的structured_outputs/response_format) +vllm-rs --m Qwen/Qwen3-30B-A3B-Instruct --allow-constraint-api --ui-server +``` + +查看 [**结构化输出文档 →**](docs/llguidance-integration.md) + +--- + +## 🔌 MCP集成 (工具调用) 通过Model Context Protocol让LLM调用外部工具。 ```bash @@ -455,6 +477,8 @@ pip install target/wheels/vllm_rs-*-cp38-abi3-*.whl --force-reinstall | `--kv-fraction` | 用于控制KVCache使用量 (模型加载后剩余可用GPU显存的百分比) | | `--prefix-cache` | 启用前缀缓存,用于多轮对话 | | `--prefix-cache-max-tokens` | 限制前缀缓存大小(按 block size 向下取整) | +| `--allow-constraint-api` | 允许通过HTTP API提交客户端约束(默认:false) | +| `--enable-tool-grammar` | 自动从工具构建LLG语法(默认:false) | ### MCP配置参数 diff --git a/ReadMe.md b/ReadMe.md index 68a3180e..f74f12be 100644 --- a/ReadMe.md +++ b/ReadMe.md @@ -95,9 +95,9 @@ All models support hardware FP8 KV-cache acceleration (requires SM90+ and disabl ## 📘 Usage in Python ### 📦 Install with pip -- 💡 **CUDA compute capability < 8.0** (e.g., V100) requires a **manual build** +- 💡 **CUDA compute capability < 8.0** (e.g., V100) requires a **manual build** (no `flash-attn` support; alternatively use **Rust mode**). -- 💡 The **prebuilt wheel** is built with the `flash-context` feature enabled. +- 💡 The **prebuilt wheel** is built with the `flash-context` feature enabled. To use **FP8 KV Cache**, you must **build manually** (remove the `flash-context` build flag). @@ -284,7 +284,7 @@ Use `--i` to enable interactive mode 🤖, `--ui-server` or `--server` to enable # Metal/MacOS vllm-rs --m Qwen/Qwen3-4B-GGUF --f Qwen3-4B-Q4_K_M.gguf --ui-server --prefix-cache ``` - +
Multi-GPU + Unquantized Model @@ -332,6 +332,28 @@ vllm-rs --m Qwen/Qwen3-4B-Instruct-2507-FP8 --ui-server --prefix-cache --- +## 🔌 LLGuidance Support (Structured Outputs & Constraints) + +vLLM.rs now supports structured output and constraint-based generation via llguidance: + +- **Tool Call Optimization**: Use `--enable-tool-grammar` to auto-build LLG grammar from tools, forcing model to output JSON matching tool parameter schemas +- **Custom Constraints**: Use `--allow-constraint-api` to allow clients to submit Lark/Regex/JSON Schema constraints via OpenAI-compatible structured_outputs/response_format +- **Regex Constraints**: Enforce output formats like phone numbers (`^number\s\d{3}-\d{3}-\d{4}$`) +- **JSON Schema Constraints**: Enforce structured output via response_format or structured_outputs + +**Usage Examples:** +```bash +# Enable tool grammar (auto-builds LLG grammar from MCP tools) +vllm-rs --m Qwen/Qwen3-30B-A3B-Instruct --enable-tool-grammar --ui-server + +# Enable client constraints API (accepts structured_outputs/response_format) +vllm-rs --m Qwen/Qwen3-30B-A3B-Instruct --allow-constraint-api --ui-server +``` + +See [**Structured Outputs Documentation →**](docs/llguidance-integration.md) + +--- + ## 🔌 MCP Integration (Tool Calling) Enable LLMs to call external tools via Model Context Protocol. @@ -434,7 +456,7 @@ PD Disaggregation separates prefill (prompt processing) and decode (token genera ## 📽️ Demo Video -Watch it in action 🎉 +Watch it in action 🎉 @@ -462,19 +484,19 @@ pip install maturin[patchelf] # For Linux/Windows 2. **Build the Python package** ```bash -# Naive CUDA (single GPU only) +# Naive CUDA (single GPU only) maturin build --release --features cuda,python # Naive CUDA (+CUDA Graph, experimental) ./build.sh --release --features cuda,graph,python -# CUDA (with prefix-cache and FP8 KV Cache, no Flash Attention, compatible with V100) +# CUDA (with prefix-cache and FP8 KV Cache, no Flash Attention, compatible with V100) ./build.sh --release --features cuda,nccl,python -# CUDA (+Flash Attention, only used in prefill stage) +# CUDA (+Flash Attention, only used in prefill stage) ./build.sh --release --features cuda,nccl,flash-attn,python -# CUDA (+cutlass (sm90+), +Flash Attention for decoding, +high prefill throughput, long time to build) +# CUDA (+cutlass (sm90+), +Flash Attention for decoding, +high prefill throughput, long time to build) ./build.sh --release --features cuda,nccl,flash-attn,flash-context,cutlass,python # macOS (Metal, single GPU only, with prefix-cache and FP8 kvcache) @@ -518,6 +540,8 @@ pip install target/wheels/vllm_rs-*-cp38-abi3-*.whl --force-reinstall | `--kv-fraction` | control kvcache usage (percentage of remaining gpu memory after model loading) | | `--prefix-cache` | Enable prefix caching for multi-turn conversations | | `--prefix-cache-max-tokens` | Cap prefix cache size in tokens (rounded down to block size) | +| `--allow-constraint-api` | Allow client-submitted constraints via HTTP API (default: false) | +| `--enable-tool-grammar` | Automatically build LLG grammar from tools (default: false) | ### MCP Configuration @@ -563,7 +587,7 @@ pip install target/wheels/vllm_rs-*-cp38-abi3-*.whl --force-reinstall * [x] **Claude/Anthropic-compatible API Server** * [x] **Support CUDA 13** * [x] **Support FlashInfer backend** -* [ ] TentorRT-LLM +* [ ] TentorRT-LLM --- ## 📚 References diff --git a/docs/goose.md b/docs/goose.md index 9d3c76dd..70d6b178 100644 --- a/docs/goose.md +++ b/docs/goose.md @@ -17,35 +17,33 @@ python3 -m vllm_rs.server --m Qwen/Qwen3-30B-A3B-Instruct-2507 --d 0,1 --server ## 2) Configure Goose -### Download and install Goose: https://block.github.io/goose/docs/getting-started/installation/ - ```shell # For non-UI system, export GOOSE_DISABLE_KEYRING=1 ``` - Export empty API KEY ```shell export VLLM_API_KEY="empty" ``` +### Download and install Goose: https://block.github.io/goose/docs/getting-started/installation/ ### Configure goose with `Custom Providers` and API key `empty` ```shell goose configure -┌ goose-configure +┌ goose-configure │ ◇ What would you like to configure? -│ Custom Providers +│ Custom Providers │ ◇ What would you like to do? -│ Add A Custom Provider +│ Add A Custom Provider │ ◇ What type of API is this? -│ OpenAI Compatible +│ OpenAI Compatible │ ◇ What should we call this provider? │ vllm-rs @@ -60,10 +58,10 @@ goose configure │ default │ ◇ Does this provider support streaming responses? -│ Yes +│ Yes │ ◇ Does this provider require custom headers? -│ No +│ No │ └ Custom provider added: vllm-rs └ Configuration saved successfully to /root/.config/goose/config.yaml diff --git a/docs/llguidance-integration.md b/docs/llguidance-integration.md new file mode 100644 index 00000000..f3caea5b --- /dev/null +++ b/docs/llguidance-integration.md @@ -0,0 +1,2175 @@ +# LLGuidance Integration Documentation + +## Overview + +This document provides comprehensive documentation for the llguidance integration in vllm.rs, covering: + +1. **Architecture** - System design and component interactions +2. **Data Flow** - Complete request-to-response flow +3. **API Reference** - All public functions and their signatures +4. **Usage Examples** - Common patterns and use cases +5. **Mathematical Foundations** - How the grammar system works +6. **Rollback Mechanics** - State recovery and consistency +7. **Grammar Construction** - How grammars are composed and merged +8. **Grammar Rule Order** - Correct ordering of Lark grammar rules + +--- + +## 1. ARCHITECTURE + +### Component Overview + +```mermaid +sequenceDiagram + participant User + participant API + participant Pipeline + participant SpecialTokens + participant LLGFactory + participant Matcher + participant TokenParser + participant EarleyParser + participant Lexer + participant TokTrie + participant Sampler + participant LogitsProcessor + participant Model + + User->>API: Request with constraint (regex/json_schema/lark/llguidance) + + Note over User,API: Phase 1: Request Setup and Grammar Building + + API->>SpecialTokens: SpecialTokens::new(&tokenizer) + SpecialTokens-->>API: Return EOS, BOS, TOOL token IDs + API->>Pipeline: build_llg_factory(tokenizer) + Pipeline->>LLGFactory: toktrie_hf_tokenizers::ByteTokenizer::from_tokenizer(tokenizer) + LLGFactory->>TokTrie: Create token trie from tokenizer vocabulary + TokTrie-->>LLGFactory: Return TokEnv with trie + LLGFactory->>LLGFactory: ParserFactory::new_simple(&env) + LLGFactory-->>Pipeline: Return Arc + + Pipeline->>Pipeline: llg_grammar_from_constraint(&request.constraint) + Pipeline->>Matcher: constraint_from_llg_grammar(&factory, grm) + Matcher->>Matcher: factory.create_parser(grm) + Matcher->>TokenParser: Create with grammar_init + TokenParser->>EarleyParser: Build CGrammar from grammar + TokenParser->>Lexer: Build LexerSpec from grammar + Lexer->>TokTrie: Precompute large lexemes if needed + TokTrie-->>Lexer: Return optimized lexeme sets + + Note over User,Matcher: Phase 2: Prompt Processing (if needed) + + User->>API: Optional: process_prompt(prompt_tokens) + API->>TokenParser: process_prompt(prompt_tokens) + TokenParser->>TokenParser: tokenize_bytes_marker(&prompt_bytes) + TokenParser->>TokenParser: process_prompt() returns new prompt + + Note over User,Matcher: Phase 3: Inference Loop + + loop for each token generation + + Model->>Model: Forward pass on input tokens + Model-->>Pipeline: Return logits tensor + + Pipeline->>Sampler: sample_sequence(logits, seq, ...) + + Note over Sampler: Two-stage sampling with llguidance + + Sampler->>LogitsProcessor: Apply llguidance constraint + + LogitsProcessor->>TokenParser: compute_mask() + TokenParser->>TokenParser: compute_mask_inner() + TokenParser->>EarleyParser: run_speculative("compute_mask") + EarleyParser->>EarleyParser: trie_started("compute_mask") + EarleyParser->>EarleyParser: compute_bias() + EarleyParser->>Lexer: compute_bias() with token_prefix + + Note over Lexer,TokTrie: Lexical Scope Analysis + + Lexer->>TokTrie: Walk token trie for allowed lexemes + TokTrie-->>Lexer: Return SimpleVob bit mask + + Lexer->>EarleyParser: Return mask to TokenParser + TokenParser->>TokenParser: cache mask for fast-forward + + TokenParser-->>LogitsProcessor: Return SimpleVob mask + + LogitsProcessor->>LogitsProcessor: Check if sampled token is allowed + LogitsProcessor->>Sampler: Apply logit biasing + + alt Token is allowed + Sampler->>Sampler: No biasing needed + else Token is not allowed + Sampler->>Sampler: Set invalid tokens to -f32::INFINITY + Sampler->>Sampler: Re-sample with biased logits + end + + Sampler->>TokenParser: consume_token(sampled_token) + TokenParser->>TokenParser: apply_token(sampled_token) + TokenParser->>TokenParser: llm_tokens.push(sampled_token) + TokenParser->>TokenParser: llm_bytes.extend(token_bytes) + TokenParser->>EarleyParser: parser.apply_token(token_bytes, token_id) + EarleyParser->>Lexer: advance lexer state + Lexer->>Lexer: Update lexer_stack with new state + Lexer->>EarleyParser: Return backtrack count + + alt Backtrack needed + EarleyParser->>EarleyParser: rollback(backtrack_bytes) + EarleyParser->>EarleyParser: Update llm_tokens and llm_bytes + end + + TokenParser->>TokenParser: check_stop() + TokenParser-->>Sampler: Return CommitResult + + Note over Sampler: Phase 4: Fast-Forward (if enabled) + + Sampler->>TokenParser: compute_ff_tokens() + TokenParser->>TokenParser: ff_tokens() + TokenParser->>TokTrie: Tokenize forced bytes + TokTrie-->>TokenParser: Return fast-forward tokens + + alt Fast-forward tokens available + TokenParser->>TokenParser: consume_ff_tokens() + loop for each ff_token + TokenParser->>TokenParser: consume_token(ff_token) + TokenParser->>TokenParser: llm_tokens.push(ff_token) + TokenParser->>TokenParser: llm_bytes.extend(ff_token_bytes) + end + end + + Note over Sampler: Phase 5: Speculative Decoding (if enabled) + + Model->>Model: Draft model forward pass + Model-->>Pipeline: Return draft logits + + Pipeline->>Sampler: sample_target_sequence_speculative() + Sampler->>TokenParser: rollback(n_toks) + TokenParser->>EarleyParser: parser.rollback(bytes_to_drop) + EarleyParser->>Lexer: pop lexer states + Lexer-->>TokenParser: Return rollback result + + Sampler->>Sampler: Sample draft tokens + Sampler->>TokenParser: validate_tokens(draft_tokens) + TokenParser->>TokenParser: consume_token(draft_token) + + alt Draft token accepted + TokenParser->>TokenParser: Continue with next draft + else Draft token rejected + TokenParser->>TokenParser: Accept partial draft + TokenParser->>TokenParser: Rollback to last valid state + end + + end + + Note over User,Matcher: Phase 6: Token Geometry and Binary Data State + + TokTrie->>TokTrie: Token encoding (8:24 bit split) + TokTrie->>TokTrie: node.bits = (token_id << 8) | byte + TokTrie->>TokTrie: node.bits2 = (subtree_size << 10) | num_parents + + TokTrie->>SimpleVob: Bit mask storage + SimpleVob->>SimpleVob: data: Vec (32 tokens per word) + SimpleVob->>SimpleVob: allow_token(tok): data[tok>>5] |= 1 << (tok&31) + + Note over User,Matcher: Phase 7: Rollback and Verification + + TokenParser->>TokenParser: validate_tokens(tokens) + TokenParser->>EarleyParser: validate_tokens_raw(tokens) + EarleyParser->>Lexer: Check if tokens match current lexer state + Lexer-->>TokenParser: Return number of valid tokens + + TokenParser->>TokenParser: rollback(n_tokens) + TokenParser->>EarleyParser: parser.rollback(bytes_to_drop) + EarleyParser->>Lexer: pop lexer states + TokenParser->>TokenParser: llm_tokens.truncate(new_len) + TokenParser->>TokenParser: llm_bytes.truncate(new_len) + + Note over User,Matcher: Phase 8: Response Generation + + Pipeline->>API: Return completion with tokens + API->>User: Stream or return final response + end +``` + +### Key Data Structures + +#### `TopLevelGrammar` Struct +**Location**: [`src/utils/guidance.rs:515-523`](src/utils/guidance.rs:515-523) + +The `TopLevelGrammar` is provided by llguidance and represents a complete grammar specification. + +```rust +pub struct GuidanceState { + matcher: Matcher, // llguidance Matcher instance + llm_tokens: Vec, // Track committed tokens for rollback + llm_bytes: usize, // Track byte position for rollback + slicer_cache: SlicerCache, // Cache for precomputed mask slices +} +``` + +#### `SamplingParams` Struct +**Location**: [`src/utils/config.rs`](src/utils/config.rs) + +```rust +pub struct SamplingParams { + pub temperature: Option, + pub max_tokens: Option, + pub ignore_eos: bool, + pub top_k: Option, + pub top_p: Option, + pub session_id: Option, + pub frequency_penalty: Option, + pub presence_penalty: Option, + pub stop_sequences: Option>, + // stop_token_ids removed - now uses SpecialTokens for EOS detection + pub mcp_mode: Option, // Tool call mode + pub grammar: Option, // LLG constraint (TopLevelGrammar) + pub thinking: Option, + #[cfg(feature = "python")] + pub grammar_json: Option, // Grammar as JSON string for Python API +} +``` + +**Note**: The `stop_token_ids` field has been removed. Stop sequences are now resolved using the engine's `SpecialTokens` instance for consistent EOS detection across the system. + +#### `EngineConfig` Struct +**Location**: [`src/utils/config.rs:244-289`](src/utils/config.rs:244-289) + +```rust +pub struct EngineConfig { + pub model_id: Option, + pub weight_path: Option, + pub weight_file: Option, + pub enforce_parser: Option, + // ... other fields ... + pub allow_constraint_api: bool, // Allow client constraints via API + pub enable_tool_grammar: bool, // Auto-generate tool grammars from resolved tools + // ... other fields ... +} +``` + +#### Constraint Building Functions + +The server layer provides several functions to convert client requests to `TopLevelGrammar`: + +| Function | Location | Description | +|----------|----------|-------------| +| `grammar_fragment_from_structured_outputs()` | [`src/server/mod.rs:167`](src/server/mod.rs:167) | Convert StructuredOutputs to TopLevelGrammar | +| `grammar_fragment_from_response_format()` | [`src/server/mod.rs:248`](src/server/mod.rs:248) | Convert ResponseFormat to TopLevelGrammar | + +### Grammar Composition + +The `compose_grammars()` function ([`src/utils/guidance.rs:581`](src/utils/guidance.rs:581)) handles grammar composition: + +```rust +pub fn compose_grammars( + constraint_grammars: Vec, // Client-provided constraints + tool_grammar: Option, // Tool call grammar (if enabled) + has_tools: bool, // Whether tools are present + tool_choice_required: bool, // Whether tool_choice is "required" + forced_tool_name: Option, // Specific tool forced via tool_choice + max_tokens: Option, + special_tokens: &SpecialTokens, // ← EOS token IDs for TEXT patterns +) -> TopLevelGrammar +``` + +This function handles 8 different scenarios: +1. No constraint, no tools → text with EOS bounding +2. No constraint, tools optional → tool_call | text with EOS +3. No constraint, tools required → tool_call only +4. No constraint, tools optional, specific tool forced → tool_call only +5. Constraint only, no tools → constraint only +6. Constraint only, tools optional → constraint | tool_call +7. Constraint only, tools required → constraint | tool_call +8. Constraint only, specific tool forced → constraint | tool_call + +**Note**: The `special_tokens: &SpecialTokens` parameter provides EOS token IDs for proper freeform text termination via [`chat_text_expression_with_eos()`](src/utils/guidance.rs:485). + +--- + +## 2. DATA FLOW + +### Full Request Flow + +``` +User Request + │ + ▼ +[server/server.rs:250] chat_completion() + │ + ├─ Parse request fields (messages, tools, tool_choice, etc.) + ├─ Resolve tools (MCP + request tools) + │ + ├─ Build constraint grammars from structured_outputs/response_format + │ ├─ grammar_fragment_from_structured_outputs() [server/mod.rs:167] + │ │ └─ Handles choice, regex, json, grammar, structural_tag + │ └─ grammar_fragment_from_response_format() [server/mod.rs:248] + │ └─ Handles response_format with json_schema type + │ + ├─ Build tool grammar if enable_tool_grammar=true + │ [tools/schema.rs:87] build_json_tool_lark_grammar() + │ └─ Creates Lark grammar with tool schemas as %json directives + │ + ├─ Initialize SpecialTokens for EOS detection + │ [utils/special_tokens.rs:133] SpecialTokens::new(&tokenizer) + │ └─ Extracts EOS, BOS, PAD, TOOL, FUNCTION token IDs from tokenizer + │ + ├─ Compose grammars via compose_grammars() [utils/guidance.rs:581] + │ └─ Passes special_tokens: &SpecialTokens for EOS handling + │ + ▼ +[core/engine.rs:1298] generate_stream() + │ + ├─ Apply chat template to messages + ├─ Create Sequence with grammar in SamplingParams + ├─ Allocate KV cache blocks + └─ Initialize GuidanceState if grammar exists + [utils/guidance.rs:526] GuidanceState::new_from_grammar() + ├─ [llguidance] ParserFactory::create_parser(grammar)? + ├─ [llguidance] Matcher::new(Ok(parser)) + │ └─ TokenParser created with Grammar + │ └─ EarleyParser with LexerSpec + │ └─ RegexVec for token matching + └─ GuidanceState initialized with matcher + │ + ▼ +[core/scheduler.rs:616] postprocess() + │ + ├─ For each generated token: + ├─ [core/runner.rs:1597] validate_sequence_for_grammar() + │ └─ [utils/guidance.rs:672] GuidanceState::validate_tokens(output_ids) + │ └─ matcher.validate_tokens() → Option + │ + └─ If validation fails (< output_ids.len()): + └─ [scheduler.rs:227] rollback_sequence() + ├─ Save rollback snapshot + ├─ Truncate token_ids, block_table + ├─ [core/block_manager.rs:946] rollback_to_seq_tokens() + │ └─ Release blocks, clean prefix cache + └─ [core/runner.rs:1607] rollback_sequence_for_guidance() + └─ [utils/guidance.rs:645] GuidanceState::rollback_to() + └─ matcher.rollback(tokens_to_rollback)? + │ + ▼ +[core/runner.rs:1100] sample() + │ + ├─ [utils/guidance.rs:543] GuidanceState::compute_mask() + │ └─ matcher.compute_mask() → SimpleVob + │ + ├─ Apply mask to logits (set invalid to -inf) + │ + └─ [utils/guidance.rs:617] GuidanceState::consume_ff_tokens() + ├─ matcher.compute_ff_tokens() → Vec + └─ For each token: consume + update state + │ + ▼ +Response to Client +``` + +### Streaming Tool Call Flow + +``` +[server/parser.rs:488] StreamToolParser::process_token() + │ + ├─ ParserState::Normal + │ ├─ Check for start tag (, [TOOL_CALLS], etc.) + │ └─ [server/parser.rs:527] is_start_token() + │ └─ Token ID match OR text match + │ + ├─ ParserState::Buffering + │ ├─ Accumulate tokens in buffer + │ ├─ [tool_parser crate] parse_incremental() + │ ├─ Check for end tag + │ └─ [server/parser.rs:587] is_end_token() + │ + └─ ParserState::ToolCalls + ├─ [server/parser.rs:766] build_tool_calls_with_fallback() + ├─ [server/parser.rs:946] parse_complete_with_fallback() + │ ├─ QwenCoder XML parsing + │ ├─ JSON array parsing + │ └─ JSON object parsing + └─ [tools/helpers.rs:102] filter_tool_calls() +``` + +--- + +## 3. GRAMMAR CONSTRUCTION + +### Overview + +The grammar construction system in vllm.rs uses llguidance's `TopLevelGrammar` to represent constraints. When multiple grammars need to be combined (e.g., `constraint | tool_call`), the `compose_grammars()` function handles the composition logic. + +### Grammar Composition Logic + +The `compose_grammars()` function ([`src/utils/guidance.rs:363`](src/utils/guidance.rs:363)) handles 8 different scenarios based on: +- Whether constraint grammars are present +- Whether tool grammars are available +- Whether tool_choice is "required" +- Whether a specific tool is forced + +| Constraint | Tools | tool_choice | Result | +|------------|-------|-------------|---------| +| None | None | - | TEXT only | +| None | Yes | Optional | TEXT \| tool_call | +| None | Yes | Required | tool_call only | +| Yes | None | - | constraint only | +| Yes | Yes | Optional | constraint \| tool_call | +| Yes | Yes | Required | constraint \| tool_call | + +### Lark Grammar Rule Order + +**Critical Requirement**: In Lark grammars, rules must be ordered such that: +1. The `start:` rule is defined FIRST (it's the entry point) +2. Rules that are referenced by other rules must be defined BEFORE those rules +3. Helper rules like `ws:` (whitespace) are defined LAST + +**Incorrect order** (causes parsing errors): +```lark +start: TEXT | tool_call +tool_call: "<‌tool_call>" ws json_array ws "<‌/tool_call>" +json_array: "[" obj ("," obj)* "]" +obj: obj_search | obj_weather +ws: /[ \t\r\n]+/ ← Helper rule defined BEFORE referenced rules +``` + +**Correct order** (dependencies first, helpers last): +```lark +start: TEXT | tool_call +tool_call: "<‌tool_call>" ws json_array ws "<‌/tool_call>" +json_array: "[" obj ("," obj)* "]" +obj: obj_search | obj_weather +obj_search: %json {...} +obj_weather: %json {...} +ws: /[ \t\r\n]+/ ← Helper rule defined LAST +``` + +### Grammar Composition + +```mermaid +sequenceDiagram + participant User + participant Server + participant ConstraintBuilder + participant ToolGrammarBuilder + participant ComposeLogic + participant LarkParser + participant EarleyCompiler + + User->>Server: Request with tools + structured_outputs + Server->>ConstraintBuilder: grammar_fragment_from_structured_outputs() + ConstraintBuilder->>LarkParser: Parse JSON schema + + LarkBuilder->>ConstraintBuilder: Return TopLevelGrammar + Server->>ToolGrammarBuilder: build_json_tool_lark_grammar() if enabled + + ToolGrammarBuilder->>LarkParser: Build tool call grammar + LarkParser->>ToolGrammarBuilder: Return tool TopLevelGrammar + + Note over ComposeLogic: compose_grammars() + + ComposeLogic->>ComposeLogic: Determine match arm based on: + ComposeLogic->>ComposeLogic: - constraint_grammars length + ComposeLogic->>ComposeLogic: - tool_grammar presence + ComposeLogic->>ComposeLogic: - tool_choice_required + ComposeLogic->>ComposeLogic: - forced_tool_name presence + + ComposeLogic->>ComposeLogic: If multiple grammars: merge_top_level_grammars() + ComposeGrammar->>EarleyCompiler: Compile direct alternation + EarleyCompiler->>LexerBuilder: Build lexer spec + + LexerBuilder->>ComposeLogic: Return single TopLevelGrammar + + Note over ComposeLogic: Generated grammar: + start: TEXT | tool_call + TEXT: /[\x20-\x7E\x80-\uFFFF\n\r\t]/ + tool_call: "<‌tool_call>" ws json_array ws "<‌/tool_call>" + json_array: "[" obj ("," obj)* "]" + obj_search: %json {...} + obj_weather: %json {...} + obj: obj_search | obj_weather + ws: /[ \t\r\n]+/ +``` + + + +### Helper Functions + + + +**Location**: [`src/utils/guidance.rs:161-206`](src/utils/guidance.rs:161-206) + +This function combines multiple `TopLevelGrammar` objects into a single grammar with direct alternation at the start rule. + +```rust +pub fn merge_top_level_grammars( + grammars: Vec, + max_tokens: Option, + start_separator: Option, +) -> TopLevelGrammar { + // Extract all Lark grammar strings + let mut lark_parts = Vec::new(); + + for (_i, g) in grammars.iter().enumerate() { + for gw in &g.grammars { + if let Some(lark) = &gw.lark_grammar { + lark_parts.push(lark.clone()); + } + } + } + + if lark_parts.is_empty() { + let lark_start_exp = format!("start: TEXT\n{}", chat_text_expression()); + let mut tlg = TopLevelGrammar::from_lark(lark_start_exp); + tlg.max_tokens = max_tokens; + return tlg; + } + + // Parse each grammar and extract start RHS + other rules + let mut combined_start_rhs = Vec::new(); + let mut all_other_rules = Vec::new(); + + for lark in lark_parts.iter() { + let (start_rhs, other_rules) = parse_lark_grammar(lark); + combined_start_rhs.push(start_rhs); + all_other_rules.extend(other_rules); + } + + // Combine all other rules, handling duplicates + let combined_rules = combine_rules(all_other_rules); + + // Build new grammar with direct alternation at start + let start_separator = format!(" {} ", &start_separator.unwrap_or_else(|| "|".to_string())); + let start_alternation = combined_start_rhs.join(&start_separator); + let final_grammar = format!("start: {}\n{}", start_alternation, combined_rules); + + let mut top_gram = TopLevelGrammar::from_lark(final_grammar); + top_gram.max_tokens = max_tokens; + top_gram +} +``` + +#### `parse_lark_grammar()` + +**Location**: [`src/utils/guidance.rs:85-114`](src/utils/guidance.rs:85-114) + +Extracts the start rule RHS and other rules from a Lark grammar string. + +```rust +fn parse_lark_grammar(lark: &str) -> (String, Vec) { + let lines: Vec<&str> = lark.lines().collect(); + if lines.is_empty() { + return (String::new(), Vec::new()); + } + + let first_line = lines[0].trim(); + if first_line.starts_with("start:") { + // Extract only the rule names after "start:", not the full rule definition + let rhs_part = first_line.strip_prefix("start:").unwrap_or("").trim(); + + // Parse the RHS to get individual rule names (separated by |) + let rule_names: Vec = rhs_part + .split('|') + .map(|s| s.trim().to_string()) + .collect(); + let start_rhs = rule_names.join(" | "); + + // Return all remaining lines as other rules + let other_rules: Vec = lines[1..].iter().map(|s| s.to_string()).collect(); + (start_rhs, other_rules) + } else { + // No start rule - treat entire grammar as the start rule + (lark.to_string(), Vec::new()) + } +} +``` + +#### `combine_rules()` + +**Location**: [`src/utils/guidance.rs:117-156`](src/utils/guidance.rs:117-156) + +Merges grammar rules, handling duplicate rule names by combining them with alternation. + +```rust +fn combine_rules(rules: Vec) -> String { + if rules.is_empty() { + return String::new(); + } + + use std::collections::HashMap; + let mut rule_groups: HashMap> = HashMap::new(); + + for rule in rules { + let rule = rule.trim(); + if rule.is_empty() { + continue; + } + + // Find the rule name (before the first ":") + if let Some(colon_pos) = rule.find(':') { + let name = rule[..colon_pos].trim().to_string(); + let body = rule[colon_pos + 1..].trim().to_string(); + rule_groups.entry(name).or_default().push(body); + } else { + // Rule without colon - add as-is + rule_groups.entry("anonymous".to_string()).or_default().push(rule.to_string()); + } + } + + // Reconstruct rules, merging duplicates + let mut combined = Vec::new(); + for (name, bodies) in rule_groups { + if bodies.len() == 1 { + combined.push(format!("{}: {}", name, bodies[0])); + } else { + // Multiple definitions for same rule - combine with alternation + combined.push(format!("{}: {}", name, bodies.join(" | "))); + } + } + + combined.join("\n") +} +``` + +### Tool Grammar Construction + +**Location**: [`src/tools/schema.rs:87-145`](src/tools/schema.rs:87-145) + +The `build_json_tool_lark_grammar()` function creates properly ordered tool grammars: + +```rust +pub fn build_json_tool_lark_grammar( + tools: &[Tool], + start: &str, + end: &str, + start_is_special: bool, + end_is_special: bool, +) -> llguidance::api::TopLevelGrammar { + let lark = build_json_tool_lark_string(tools, start, end, start_is_special, end_is_special); + top_level_grammar_from_lark(&lark) +} + +fn build_json_tool_lark_string( + tools: &[Tool], + start: &str, + end: &str, + start_is_special: bool, + end_is_special: bool, +) -> String { + let mut obj_rules = Vec::new(); + for tool in tools { + let tool_name = tool.function.name.replace("-", "_"); + let schema_str = serde_json::to_string(&tool.function.parameters).unwrap_or_default(); + obj_rules.push(format!("obj_{}: %json {}", tool_name, schema_str)); + } + let start_tag = if start.is_empty() { + "<‌tool_call>".to_string() + } else { + lark_literal(start, start_is_special) + }; + let end_tag = if end.is_empty() { + "<‌/tool_call>".to_string() + } else { + lark_literal(end, end_is_special) + }; + let ws = lark_ws_regex(); + + // Build the complete grammar with rules in correct dependency order + let mut all_rules = Vec::new(); + all_rules.push("start: tool_call".to_string()); + all_rules.push(format!("tool_call: {} ws json_array ws {}", start_tag, end_tag)); + all_rules.push("json_array: \"[\" obj (\",\" obj)* \"]\"".to_string()); + + if obj_rules.is_empty() { + // No tools - use a generic object schema + all_rules.push("obj: %json {\"type\": \"object\"}".to_string()); + } else { + // Individual obj_* rules must come BEFORE the obj: rule that references them + all_rules.extend(obj_rules.clone()); + // obj: rule references all obj_* rules via alternation + all_rules.push(format!("obj: {}", obj_rules.iter().map(|r| { + r.trim().split(':').next().unwrap_or("obj").to_string() + }).collect::>().join(" | "))); + } + // ws comes LAST - it's a helper rule for whitespace + all_rules.push(format!("ws: {}", ws)); + + all_rules.join("\n") + "\n" +} +``` + +**Generated grammar order**: +```lark +start: tool_call +tool_call: <‌tool_call> ws json_array ws <‌/tool_call> +json_array: "[" obj ("," obj)* "]" +obj_search: %json {"type":"object","properties":{...}} +obj_weather: %json {"type":"object","properties":{...}} +obj: obj_search | obj_weather +ws: /[ \t\r\n]+/ +``` + +**Note**: The tool call tags (`<‌tool_call>`, `<‌/tool_call>`) are currently specified as **string literals** in the Lark grammar, not as token IDs. This requires the tokenizer to recognize these exact byte sequences as single tokens. + +For token ID support, the `lark_special_token()` function exists ([`src/tools/schema.rs:60-67`](src/tools/schema.rs:60-67)) but is not currently used in tool grammar construction. To use token IDs instead of strings, the grammar would need to be generated with `<[151657]>` syntax where 151657 is the token ID for `<‌tool_call>`. + +**Token ID-based alternative** (not currently implemented): +```lark +start: tool_call +tool_call: <[151657]> ws json_array ws <[151658]> +json_array: "[" obj ("," obj)* "]" +... +``` + +--- + +## 4. API REFERENCE + +### Grammar Fragment Building Functions + +These functions convert client request fields to `TopLevelGrammar` objects. + +#### `grammar_fragment_from_structured_outputs()` +**Location**: [`src/server/mod.rs:167`](src/server/mod.rs:167) + +Converts `StructuredOutputs` to `TopLevelGrammar`: + +```rust +pub fn grammar_fragment_from_structured_outputs( + structured: &StructuredOutputs +) -> Result> +``` + +**Parameters**: +- `structured.choice`: Vec → Lark grammar for enum +- `structured.regex`: String → Regex constraint +- `structured.json`: Value → JSON Schema constraint +- `structured.grammar`: String → Lark grammar +- `structured.structural_tag`: Value → QwenCoder-style XML envelope + +**Returns**: `Some(TopLevelGrammar)` or `None` if invalid + +**Logging**: +- `DEBUG`: Building constraint grammar +- `INFO`: Completed with grammar type + +#### `grammar_fragment_from_response_format()` +**Location**: [`src/server/mod.rs:248`](src/server/mod.rs:248) + +Converts `ResponseFormat` to `TopLevelGrammar`: + +```rust +pub fn grammar_fragment_from_response_format( + response_format: &ResponseFormat +) -> Result> +``` + +**Parameters**: +- `response_format.format_type`: Must be "json_schema" +- `response_format.json_schema.schema`: JSON Schema value + +**Returns**: `Some(TopLevelGrammar)` or error + +**Logging**: +- `DEBUG`: Building JSON schema grammar +- `INFO`: Completed with grammar + +### GuidanceState Methods + +#### `new_from_grammar()` +**Location**: [`src/utils/guidance.rs:526`](src/utils/guidance.rs:526) + +Creates a new GuidanceState from a TopLevelGrammar: + +```rust +pub fn new_from_grammar(factory: Arc, grammar: &TopLevelGrammar) -> Result +``` + +**Parameters**: +- `factory`: Arc - llguidance parser factory +- `grammar`: &TopLevelGrammar - The grammar to parse + +**Returns**: `Result` + +**Flow**: +1. `factory.create_parser(grammar)?` → Parser +2. `Matcher::new(Ok(parser))` → Matcher +3. Initialize `llm_tokens`, `llm_bytes`, `slicer_cache` + +**Logging**: +- `DEBUG`: Constraint type +- `DEBUG`: Grammar converted +- `INFO`: GuidanceState created successfully + +#### `compute_mask()` +**Location**: [`src/utils/guidance.rs:543`](src/utils/guidance.rs:543) + +Computes valid token mask: + +```rust +pub fn compute_mask(&mut self) -> Result> +``` + +**Returns**: `Option` with valid token indices, or None if matcher stopped + +**Logging**: +- `TRACE`: Mask computed with N valid tokens + +#### `validate_tokens()` +**Location**: [`src/utils/guidance.rs:672`](src/utils/guidance.rs:672) + +Validates a sequence of tokens: + +```rust +pub fn validate_tokens(&mut self, tokens: &[u32]) -> Option +``` + +**Parameters**: +- `tokens`: &[u32] - Tokens to validate + +**Returns**: `Some(valid_token_count)` or `None` if validation failed + +**Logging**: +- `DEBUG`: Token X rejected by grammar (if invalid) + +#### `commit_token()` +**Location**: [`src/utils/guidance.rs:556`](src/utils/guidance.rs:556) + +Commits a token to the grammar state: + +```rust +pub fn commit_token(&mut self, token: u32) -> Result<()> +``` + +**Parameters**: +- `token`: u32 - Token ID to commit + +**Flow**: +1. `matcher.consume_token(token)?` +2. `llm_tokens.push(token)` +3. `llm_bytes += 4` (approximate bytes per token) + +**Logging**: +- `TRACE`: Token consumed successfully + +#### `consume_ff_tokens()` +**Location**: [`src/utils/guidance.rs:617`](src/utils/guidance.rs:617) + +Consumes fast-forward tokens guaranteed by grammar: + +```rust +pub fn consume_ff_tokens(&mut self) -> Result, anyhow::Error> +``` + +**Returns**: Vec of consumed FF tokens + +**Flow**: +1. `matcher.compute_ff_tokens()` → Vec +2. For each token: `consume_token()` + `llm_tokens.push()` + `llm_bytes += 4` + +**Logging**: +- `DEBUG`: consume_ff_tokens() called +- `DEBUG`: compute_ff_tokens() returned N tokens +- `DEBUG`: Successfully consumed N tokens + +#### `rollback_to()` +**Location**: [`src/utils/guidance.rs:645`](src/utils/guidance.rs:645) + +Rolls back to a previous state: + +```rust +pub fn rollback_to(&mut self, token_pos: usize, byte_pos: usize) -> Result<()> +``` + +**Parameters**: +- `token_pos`: usize - Target token position +- `byte_pos`: usize - Target byte position + +**Flow**: +1. Calculate `tokens_to_rollback = llm_tokens.len() - token_pos` +2. `matcher.rollback(tokens_to_rollback)?` +3. `llm_tokens.truncate(token_pos)` +4. `llm_bytes = byte_pos` + +**Logging**: +- `DEBUG`: Rollback N tokens successful + +#### `num_tokens()` +**Location**: [`src/utils/guidance.rs:571`](src/utils/guidance.rs:571) + +Returns the number of committed tokens: + +```rust +pub fn num_tokens(&self) -> usize +``` + +#### `num_bytes()` +**Location**: [`src/utils/guidance.rs:576`](src/utils/guidance.rs:576) + +Returns the number of committed bytes: + +```rust +pub fn num_bytes(&self) -> usize +``` + +#### `is_finished()` +**Location**: [`src/utils/guidance.rs:581`](src/utils/guidance.rs:581) + +Checks if guidance is finished: + +```rust +pub fn is_finished(&self) -> bool +``` + +#### `last_token()` +**Location**: [`src/utils/guidance.rs:586`](src/utils/guidance.rs:586) + +Gets the last committed token: + +```rust +pub fn last_token(&self) -> Option +``` + +#### `validate_token()` +**Location**: [`src/utils/guidance.rs:591`](src/utils/guidance.rs:591) + +Validates a single token without consuming: + +```rust +pub fn validate_token(&mut self, token: u32) -> bool +``` + +**Returns**: `true` if valid, `false` if rejected + +**Logging**: +- `DEBUG`: Token rejected by grammar (if invalid) + +#### `compute_mask_or_eos()` +**Location**: [`src/utils/guidance.rs:604`](src/utils/guidance.rs:604) + +Computes valid token mask or EOS set: + +```rust +pub fn compute_mask_or_eos(&mut self) -> Result +``` + +**Returns**: `SimpleVob` with valid token indices + +#### `compute_ff_tokens()` +**Location**: [`src/utils/guidance.rs:609`](src/utils/guidance.rs:609) + +Computes fast-forward tokens without consuming: + +```rust +pub fn compute_ff_tokens(&mut self) -> Vec +``` + +**Returns**: Vec of FF tokens + +#### `has_pending_lexeme_bytes()` +**Location**: [`src/utils/guidance.rs:640`](src/utils/guidance.rs:640) + +Checks if there are pending lexeme bytes: + +```rust +pub fn has_pending_lexeme_bytes(&self) -> bool +``` + +#### `capture_snapshot()` +**Location**: [`src/utils/guidance.rs:656`](src/utils/guidance.rs:656) + +Captures current state as rollback snapshot (no-op in current implementation): + +```rust +pub fn capture_snapshot(&mut self) +``` + +#### `clear()` +**Location**: [`src/utils/guidance.rs:660`](src/utils/guidance.rs:660) + +Clears all state: + +```rust +pub fn clear(&mut self) +``` + +### Helper Functions + +#### `compose_grammars()` +**Location**: [`src/utils/guidance.rs:363`](src/utils/guidance.rs:363) + +Composes multiple grammars into a single TopLevelGrammar: + +```rust +pub fn compose_grammars( + constraint_grammars: Vec, + tool_grammar: Option, + has_tools: bool, + tool_choice_required: bool, + forced_tool_name: Option, + max_tokens: Option, +) -> TopLevelGrammar +``` + +See Section 3 for full documentation. + +#### `chat_text_expression_with_eos()` + +**Location**: [`src/utils/guidance.rs:485-514`](src/utils/guidance.rs:485-514) + +Returns the TEXT pattern with explicit EOS token IDs for free-form text matching with proper termination: + +```rust +pub fn chat_text_expression_with_eos(special_tokens: &SpecialTokens) -> String { + let eos_token_ids = special_tokens.eos_ids(); + + // First check environment variable override + if let Ok(val) = std::env::var("VLLM_LLG_DEFAULT_TEXT") { + return format!("{}", val); + } + + // Build EOS alternation pattern using <[id]> syntax for token IDs + if eos_token_ids.is_empty() { + // Fallback to stop="" when no EOS tokens available + r#"start: text +text[stop=""]: /((?s).*?)/"#.to_string() + } else if eos_token_ids.len() == 1 { + format!(r#"start: text_with_eos +text_with_eos: TEXT eos? +TEXT: /(?s:.*)/ +eos: <[{}]>"#, eos_token_ids[0]) + } else { + let ids: Vec = eos_token_ids.iter().map(|id| format!("<[{}]>", id)).collect(); + let eos_alternation = ids.join(" | "); + format!(r#"start: text_with_eos +text_with_eos: TEXT eos? +TEXT: /(?s:.*)/ +eos: {}"#, eos_alternation) + } +} +``` + +This function: +1. Extracts EOS token IDs from `SpecialTokens` +2. Builds a TEXT pattern with optional EOS termination (`eos?`) +3. Uses `<[token_id]>` syntax for token ID references in the Lark grammar +4. Falls back to `stop=""` pattern when no EOS tokens are available + +#### `merge_top_level_grammars()` +**Location**: [`src/utils/guidance.rs:161`](src/utils/guidance.rs:161) + +Merges multiple TopLevelGrammar objects with direct alternation: + +```rust +pub fn merge_top_level_grammars( + grammars: Vec, + max_tokens: Option, + start_separator: Option, +) -> TopLevelGrammar +``` + +#### `build_tool_call_lark()` +**Location**: [`src/utils/guidance.rs:483`](src/utils/guidance.rs:483) + +Builds Lark grammar string for tool calls: + +```rust +pub fn build_tool_call_lark( + tools: &[Tool], + schema_map: &Arc>, + start: &str, + end: &str, +) -> String +``` + +#### `lark_ws_regex()` +**Location**: [`src/utils/guidance.rs:478`](src/utils/guidance.rs:478) + +Returns the whitespace regex pattern for Lark grammars: + +```rust +pub fn lark_ws_regex() -> &'static str +``` + +#### `chat_text_expression()` +**Location**: [`src/utils/guidance.rs:306`](src/utils/guidance.rs:306) + +Returns the TEXT pattern for free-form text matching: + +```rust +pub fn chat_text_expression() -> String +``` + +#### `sanitize_to_ascii()` +**Location**: [`src/utils/guidance.rs:16`](src/utils/guidance.rs:16) + +Sanitizes a string by removing non-ASCII bytes: + +```rust +pub fn sanitize_to_ascii(s: &str) -> String +``` + +#### `sanitize_utf8_valid()` +**Location**: [`src/utils/guidance.rs:24`](src/utils/guidance.rs:24) + +Sanitizes a string by removing invalid UTF-8 sequences: + +```rust +pub fn sanitize_utf8_valid(s: &str) -> String +``` + +#### `top_level_grammar_from_regex()` +**Location**: [`src/utils/guidance.rs:36`](src/utils/guidance.rs:36) + +Creates TopLevelGrammar from regex: + +```rust +pub fn top_level_grammar_from_regex(regex: &str) -> TopLevelGrammar +``` + +#### `top_level_grammar_from_lark()` +**Location**: [`src/utils/guidance.rs:42`](src/utils/guidance.rs:42) + +Creates TopLevelGrammar from Lark string: + +```rust +pub fn top_level_grammar_from_lark(lark: &str) -> TopLevelGrammar +``` + +#### `top_level_grammar_from_json_schema()` +**Location**: [`src/utils/guidance.rs:48`](src/utils/guidance.rs:48) + +Creates TopLevelGrammar from JSON schema: + +```rust +pub fn top_level_grammar_from_json_schema(schema: serde_json::Value) -> Result +``` + +#### `get_lark_from_top_level_grammar()` +**Location**: [`src/utils/guidance.rs:209`](src/utils/guidance.rs:209) + +Extracts the Lark grammar string from TopLevelGrammar: + +```rust +pub fn get_lark_from_top_level_grammar(gram: &TopLevelGrammar) -> String +``` + +#### `build_grammar_vec()` +**Location**: [`src/utils/guidance.rs:316`](src/utils/guidance.rs:316) + +Builds grammar vec based on constraint and tool presence: + +```rust +pub fn build_grammar_vec( + constraint_grammars: Vec, + tool_grammar: Option, + tool_choice_required: bool, +) -> Vec +``` + +### BuildLLG Factory Functions + +#### `build_llg_factory()` +**Location**: [`src/utils/guidance.rs:449`](src/utils/guidance.rs:449) + +Builds a ParserFactory for llguidance: + +```rust +pub fn build_llg_factory( + tokenizer: Tokenizer, + vocab_size: Option, +) -> Result> +``` + +#### `load_toktrie_from_path()` +**Location**: [`src/utils/guidance.rs:471`](src/utils/guidance.rs:471) + +Loads a TokTrie from a file path: + +```rust +pub fn load_toktrie_from_path(path: impl AsRef) -> Result +``` + +### GuidanceState Methods + +#### `new_from_grammar()` +**Location**: [`src/utils/guidance.rs:425-439`](src/utils/guidance.rs:425-439) + +Creates a new GuidanceState from a constraint: + +```rust +pub fn new_from_grammar(factory: Arc, grammar: &TopLevelGrammar) -> Result +``` + +**Flow**: +1. `factory.create_parser(grammar)?` → Parser +2. `Matcher::new(Ok(parser))` → Matcher +3. Initialize `llm_tokens`, `llm_bytes`, `slicer_cache` + +**Logging**: +- `DEBUG`: Constraint type +- `DEBUG`: Grammar converted +- `INFO`: GuidanceState created successfully + +#### `validate_token()` +**Location**: [`src/utils/guidance.rs:490-500`](src/utils/guidance.rs:490-500) + +Validates a single token without consuming: + +```rust +pub fn validate_token(&mut self, token: u32) -> bool +``` + +**Returns**: `true` if valid, `false` if rejected + +**Logging**: +- `DEBUG`: Token rejected by grammar (if invalid) + +#### `commit_token()` +**Location**: [`src/utils/guidance.rs:455-467`](src/utils/guidance.rs:455-467) + +Commits a token to the grammar state: + +```rust +pub fn commit_token(&mut self, token: u32) -> Result<()> +``` + +**Flow**: +1. `matcher.consume_token(token)?` +2. `llm_tokens.push(token)` +3. `llm_bytes += 4` (approximate bytes per token) + +**Logging**: +- `TRACE`: Token consumed successfully + +#### `compute_mask_or_eos()` +**Location**: [`src/utils/guidance.rs:503-505`](src/utils/guidance.rs:503-505) + +Computes valid token mask or EOS set: + +```rust +pub fn compute_mask_or_eos(&mut self) -> Result +``` + +**Returns**: `SimpleVob` with valid token indices + +**Logging**: +- `TRACE`: Mask computed with N valid tokens + +#### `consume_ff_tokens()` +**Location**: [`src/utils/guidance.rs:516-536`](src/utils/guidance.rs:516-536) + +Consumes fast-forward tokens guaranteed by grammar: + +```rust +pub fn consume_ff_tokens(&mut self) -> Result, anyhow::Error> +``` + +**Flow**: +1. `matcher.compute_ff_tokens()` → Vec +2. For each token: `consume_token()` + `llm_tokens.push()` + `llm_bytes += 4` + +**Returns**: Vec of consumed FF tokens + +**Logging**: +- `DEBUG`: consume_ff_tokens() called +- `DEBUG`: compute_ff_tokens() returned N tokens +- `DEBUG`: Successfully consumed N tokens + +#### `rollback_to()` +**Location**: [`src/utils/guidance.rs:544-552`](src/utils/guidance.rs:544-552) + +Rolls back to a previous state: + +```rust +pub fn rollback_to(&mut self, token_pos: usize, byte_pos: usize) -> Result<()> +``` + +**Flow**: +1. Calculate `tokens_to_rollback = llm_tokens.len() - token_pos` +2. `matcher.rollback(tokens_to_rollback)?` +3. `llm_tokens.truncate(token_pos)` +4. `llm_bytes = byte_pos` + +**Logging**: +- `DEBUG`: Rollback N tokens successful + +### ModelRunner Methods + +#### `validate_sequence_for_grammar()` +**Location**: [`src/core/runner.rs:1597-1604`](src/core/runner.rs:1597-1604) + +Validates entire sequence against grammar: + +```rust +pub fn validate_sequence_for_grammar( + &self, + seq_id: usize, + output_ids: &[u32] +) -> Option +``` + +**Returns**: `Some(valid_token_count)` or `None` if no constraint + +**Flow**: +1. Get GuidanceState for seq_id +2. Call `state.validate_tokens(output_ids)` +3. Map Result → Option + +**Logging**: +- None (internal operation) + +#### `rollback_sequence_for_guidance()` +**Location**: [`src/core/runner.rs:1607-1614`](src/core/runner.rs:1607-1614) + +Rolls back guidance state for a sequence: + +```rust +pub fn rollback_sequence_for_guidance( + &self, + seq_id: usize, + target_tokens: usize +) -> Result<()> +``` + +**Flow**: +1. Get GuidanceState for seq_id +2. Calculate `target_bytes = target_tokens * 4` +3. Call `state.rollback_to(target_tokens, target_bytes)` + +**Logging**: +- None (internal operation) + +#### `consume_ff_tokens()` +**Location**: [`src/core/runner.rs:1618-1628`](src/core/runner.rs:1618-1628) + +Consumes FF tokens for a sequence: + +```rust +pub fn consume_ff_tokens(&self, seq_id: usize) -> Result> +``` + +**Returns**: FF tokens consumed + +**Flow**: +1. Get GuidanceState for seq_id +2. Call `state.consume_ff_tokens()` +3. Map errors to candle_core::Error + +**Logging**: +- None (internal operation) + +### BlockManager Methods + +#### `rollback_to_seq_tokens()` +**Location**: [`src/core/block_manager.rs:946-1005`](src/core/block_manager.rs:946-1005) + +Rolls back sequence to token position: + +```rust +pub fn rollback_to_seq_tokens( + &mut self, + seq: &mut Sequence, + target_tokens: usize +) -> Result<()> +``` + +**Flow**: +1. Calculate `target_blocks = target_tokens.div_ceil(self.block_size)` +2. Calculate `blocks_to_release = current_blocks - target_blocks` +3. Release blocks from end +4. Update `seq.num_cached_tokens` +5. Clean up prefix cache entries +6. Invalidate Mamba prefix hashes + +**Logging**: +- None (internal operation) + +--- + +## 5. USAGE EXAMPLES + +### Example 1: Enable Tool Grammar Generation + +**CLI**: +```bash +./vllm-rs --enable-tool-grammar --allow-constraint-api +``` + +**In code**: +```rust +let econfig = EngineConfig::new( + // ... other params ... + allow_constraint_api: false, + enable_tool_grammar: true, // Auto-generate tool grammar +); +``` + +When enabled, the system will: +1. Build Lark grammar from `resolved_tools` via [`build_json_tool_lark_grammar()`](src/tools/schema.rs:87) +2. Embed all tool schemas as `%json` directives +3. Make tool calls optional via `start: (TEXT | tool_call)+` (allows mid-conversation tool calls) + +### Example 2: Structured Outputs (OpenAI-style) + +There are two equivalent ways to specify structured outputs: + +**Top-level format** (recommended for convenience): +```json +{ + "messages": [{"role": "user", "content": "Generate a user profile"}], + "structured_outputs": { + "json": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"} + }, + "required": ["name", "age"] + } + } +} +``` + +**OpenAI-compatible format** (via `extra_body`): +```json +{ + "messages": [{"role": "user", "content": "Generate a user profile"}], + "extra_body": { + "structured_outputs": { + "json": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"} + }, + "required": ["name", "age"] + } + } + } +} +``` + +Both formats produce identical results. The top-level format is more convenient for direct API calls, while `extra_body` maintains OpenAI compatibility. + +### Example 3: Response Format (OpenAI-compatible) + +```json +{ + "messages": [{"role": "user", "content": "Provide a mathematical reasoning"}], + "response_format": { + "type": "json_schema", + "json_schema": { + "name": "math_reasoning", + "schema": { + "type": "object", + "properties": { + "steps": {"type": "array", "items": {"type": "string"}}, + "final_answer": {"type": "string"} + }, + "required": ["steps", "final_answer"] + } + } + } +} +``` + +### Example 4: Custom Lark Grammar + +Using the legacy `constraint` field with `constraint_type`: + +```json +{ + "messages": [{"role": "user", "content": "Generate a phone number"}], + "constraint": "start: 'Hello' _WS? 'World' _WS? '!'", + "constraint_type": "lark" +} +``` + +Or via structured_outputs: + +```json +{ + "messages": [{"role": "user", "content": "Generate a date"}], + "structured_outputs": { + "grammar": "start: date\\n date: year \"-\" month \"-\" day\\n year: /[0-9]{4}/\\n month: /[0-9]{2}/\\n day: /[0-9]{2}/" + } +} +``` + +### Example 5: Regular Expression Constraint + +Using the legacy `constraint` field: + +```json +{ + "messages": [{"role": "user", "content": "Generate a number"}], + "constraint": "^number\\s\\d{3}-\\d{3}-\\d{4}$", + "constraint_type": "regex" +} +``` + +Or via structured_outputs: + +```json +{ + "messages": [{"role": "user", "content": "Generate a number"}], + "structured_outputs": { + "regex": "^number\\s\\d{3}-\\d{3}-\\d{4}$" + } +} +``` + +### Example 6: Choice/Enum Constraint + +```json +{ + "messages": [{"role": "user", "content": "Classify this sentiment"}], + "structured_outputs": { + "choice": ["positive", "negative", "neutral"] + } +} +``` + +--- + +## 6. MATHEMATICAL FOUNDATIONS + +### Token Validation Probability + +The llguidance matcher computes the probability of each token being valid given the current grammar state: + +``` +P(token | grammar_state) = + 1.0 if token ∈ valid_tokens(grammar_state) + 0.0 otherwise +``` + +### FF Token Computation + +Fast-forward tokens are computed by exploring the grammar automaton: + +``` +FF_tokens = longest_prefix(w) where: + w ∈ Σ* (input alphabet) + ∧ δ(q0, w) ∈ F (final states) + ∧ ∀prefix p of w: δ(q0, p) defined +``` + +### Rollback Cost + +The rollback operation has O(n) complexity where n = tokens_to_rollback: + +``` +rollback_cost = O(n) + O(k) + where n = tokens to rollback + where k = bytes to adjust +``` + +### Mask Computation Complexity + +``` +mask_computation = O(|V| * |grammar_rules|) + where |V| = vocabulary size + where |grammar_rules| = number of grammar rules +``` + +With caching (SlicerCache), repeated queries at the same position are O(1). + +--- + +## 7. ROLLBACK MECHANICS + +### State Consistency Guarantees + +Before rollback: +- `Sequence.token_ids`: All tokens including invalid ones +- `Sequence.block_table`: All allocated blocks +- `Sequence.num_cached_tokens`: Full cached count +- `GuidanceState.llm_tokens`: All committed tokens +- `GuidanceState.matcher`: Parser state at invalid position +- `BlockManager.prefix_cache`: All cached entries + +After rollback: +- `Sequence.token_ids`: Truncated to valid position +- `Sequence.block_table`: Truncated to valid blocks +- `Sequence.num_cached_tokens`: Block-aligned value +- `GuidanceState.llm_tokens`: Truncated to valid position +- `GuidanceState.matcher`: Parser state at valid position +- `BlockManager.prefix_cache`: Cleaned for evicted blocks + +### Rollback Steps + +1. **Save Snapshot**: Store current state for potential recovery +2. **Truncate Sequence**: Remove invalid tokens from token_ids +3. **Truncate Blocks**: Remove blocks beyond target position +4. **Release KV Cache**: Decrement block reference counts +5. **Clean Prefix Cache**: Remove entries for released blocks +6. **Invalidate Mamba**: Remove Mamba prefix mappings +7. **Rollback Matcher**: Reset grammar state to valid position +8. **Reset Status**: Mark sequence as Running for reprocessing + +### Error Handling + +If rollback fails: +- Log error with full state dump +- Mark sequence as Finished to release resources +- Do NOT attempt partial rollback + +--- + +## 8. PERFORMANCE CONSIDERATIONS + +### Positive Impacts + +1. **Reduced re-sampling**: FF tokens skip ahead to valid continuations +2. **Smaller logit space**: Mask reduces candidates from vocab_size to valid set +3. **Early rejection**: Validation catches failures before streaming + +### Tradeoffs + +1. **Memory overhead**: GuidanceState stored per-sequence (~100KB) +2. **Parsing overhead**: StreamToolParser tracks incremental state +3. **Rollback cost**: O(n) where n = tokens to rollback + +### Recommendations + +- Use `--enable-tool-grammar` for tool-heavy workloads +- Use structured_outputs for complex JSON schemas +- Monitor `guidance_failed` counter for constraint issues + +--- + +## 9. LOGGING LEVELS + +| Level | Use Case | Example | +|-------|----------|---------| +| `TRACE` | Token-level operations | "Token 123 consumed successfully" | +| `DEBUG` | Constraint processing | "Building Lark grammar from choice options" | +| `INFO` | State changes | "GuidanceState created successfully" | +| `WARN` | Validation failures | "Token 456 rejected by grammar" | +| `ERROR` | Rollback failures | "Guidance rollback failed: ..." | + +--- + +## 10. CLI FLAGS REFERENCE + +| Flag | Default | Description | +|------|---------|-------------| +| `--allow-constraint-api` | `false` | Allow client to submit structured_outputs/response_format | +| `--enable-tool-grammar` | `false` | Automatically build LLG grammar from tools | +| `--prefix-cache` | `false` | Enable prefix caching | +| `--fp8-kvcache` | `false` | Use FP8 quantization for KV cache | + +--- + +## 11. TROUBLESHOOTING + +### Issue: "Guidance mask length is 0" + +**Cause**: Constraint is too restrictive, no tokens valid + +**Solution**: +- Check constraint grammar/schema +- Enable `allow_constraint_api` for debugging +- Remove or set `grammar: null` for non-constrained generation + +### Issue: "structured_outputs must set exactly one of choice, regex, json, grammar, or structural_tag" + +**Cause**: Multiple constraint fields specified in structured_outputs + +**Solution**: Only specify one constraint type in request + +### Issue: "Unsupported response_format type" + +**Cause**: response_format.type is not "json_schema" + +**Solution**: Use only supported types or use structured_outputs instead + +### Issue: "Tool buffering exceeded timeout" + +**Cause**: Streaming tool call taking too long to complete + +**Solution**: +- Increase `VLLM_RS_TOOL_BUFFER_TIMEOUT_SECS` +- Check for malformed tool call JSON +- Verify tool parser configuration + +--- + +## 12. TESTING & VALIDATION + +### Testing Grammar-Driven Guidance via curl + +#### Example 1: Phone Number Format (Regex Constraint) + +**Enable client constraints**: +```bash +vllm-rs --m unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF --f Qwen3-30B-A3B-Instruct-2507-Q4_K_M.gguf \ + --ui-server --allow-constraint-api +``` + +**Test request** (top-level structured_outputs): +```bash +curl -sXPOST localhost:8000/v1/chat/completions \ + -H 'Content-Type: application/json' \ + -d '{ + "messages": [{"role":"user","content":"Generate a phone number"}], + "constraint": "^number:\\s\\s\\d{3}-\\d{3}-\\d{4}\\ndo you want a sandwitch with that\\s\\S{6}", + "constraint_type": "regex" + }' | jq -r '.choices[0].message.content' +``` + +**Expected output**: +``` +number: 123-456-7890 +do you want a sandwitch with that number? +``` + +--- + +#### Example 2: JSON Schema Constraint (Structured Outputs) + +**Test request**: +```bash +curl -sXPOST localhost:8000/v1/chat/completions \ + -H 'Content-Type: application/json' \ + -d '{ + "messages": [{"role":"user","content":"Generate a user profile"}], + "structured_outputs": { + "json": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer", "minimum": 0, "maximum": 150}, + "email": {"type": "string", "pattern": "^[a-z]+@[a-z]+\\.[a-z]+$"} + }, + "required": ["name", "age", "email"], + "additionalProperties": false + } + }, + "max_tokens": 500 + }' | jq -r '.choices[0].message.content' +``` + +**Expected output**: JSON with `name` (string), `age` (integer), `email` (string matching pattern) + +--- + +#### Example 3: Tool Grammar Generation (Auto-LLG) + +**Enable tool grammar**: +```bash +vllm-rs --m unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF --f Qwen3-30B-A3B-Instruct-2507-Q4_K_M.gguf \ + --ui-server --enable-tool-grammar --mcp-config ./mcp.json +``` + +**Test request with tools**: +```bash +curl -sXPOST localhost:8000/v1/chat/completions \ + -H 'Content-Type: application/json' \ + -d '{ + "messages": [{"role":"user","content":"What is the weather in London?"}], + "tools": [{ + "type": "function", + "function": { + "name": "get_weather", + "description": "Get current weather for a location", + "parameters": { + "type": "object", + "properties": { + "location": {"type": "string", "description": "City name"} + }, + "required": ["location"] + } + } + }], + "tool_choice": "auto", + "max_tokens": 500 + }' | jq -r '.choices[0].message.content' +``` + +**Expected output**: Tool call in proper format with `name` and `arguments` + +--- + +#### Example 4: Choice/Enum Constraint (Lark Grammar) + +**Test request**: +```bash +curl -sXPOST localhost:8000/v1/chat/completions \ + -H 'Content-Type: application/json' \ + -d '{ + "messages": [{"role":"user","content":"Classify this sentiment"}], + "structured_outputs": { + "choice": ["positive", "negative", "neutral"] + }, + "max_tokens": 50 + }' | jq -r '.choices[0].message.content' +``` + +**Expected output**: One of `positive`, `negative`, or `neutral` (quoted string) + +--- + +#### Example 5: Custom Lark Grammar + +**Test request**: +```bash +curl -sXPOST localhost:8000/v1/chat/completions \ + -H 'Content-Type: application/json' \ + -d '{ + "messages": [{"role":"user","content":"Generate a date"}], + "structured_outputs": { + "grammar": "start: date\\n date: year \"-\" month \"-\" day\\n year: /[0-9]{4}/\\n month: /[0-9]{2}/\\n day: /[0-9]{2}/" + }, + "max_tokens": 50 + }' | jq -r '.choices[0].message.content' +``` + +**Expected output**: Date in `YYYY-MM-DD` format + +--- + +### Verification Checklist + +For each test, verify: +1. [ ] Response contains only tokens valid per the grammar/constraint +2. [ ] No invalid JSON structure produced +3. [ ] Tool calls follow proper `name`/`arguments` format +4. [ ] Regex patterns matched exactly +5. [ ] Enum choices limited to specified options + +--- + +### Log Messages to Watch For + +| Message | Meaning | +|---------|---------| +| `[llg] Applied constraint to params` | Constraint successfully set from tools | +| `[llg] GuidanceState created successfully` | Grammar parser initialized | +| `[llg] Token X rejected by grammar` | Token validation failed | +| `[llg] Resampled token X consumed by matcher` | Re-sampling worked correctly | +| `[Seq X] Exceeded 3 rollback attempts` | Rolling back too often - check constraint | + +--- + +## 14. TOKEN ID BASED LARK GRAMMAR CALL GRAPH + +### Overview + +When `start_token_ids` and `end_token_ids` are provided to `build_json_tool_lark_grammar()` or `build_xml_tool_lark_grammar()`, the system uses token ID syntax (`<[token_id]>`) instead of string literals in the Lark grammar. + +### Call Graph + +``` +Server Request + │ + ▼ +[server/server.rs:458-466] build_json_tool_lark_grammar() + │ + ├─ tool_config.start_token_ids (e.g., {151657}) + ├─ tool_config.end_token_ids (e.g., {151658}) + │ + ▼ +[tools/schema.rs:87-100] build_json_tool_lark_grammar() + │ + ├─ Accepts start_token_ids: Option<&HashSet> + ├─ Accepts end_token_ids: Option<&HashSet> + │ + ▼ +[tools/schema.rs:118-143] build_json_tool_lark_string() + │ + ├─ if start_token_ids.is_some_and(|ids| !ids.is_empty()): + │ └─ [tools/schema.rs:60-67] lark_special_token(ids) + │ └─ Returns: "<[151657]>" (token ID syntax) + │ + ├─ else: + │ └─ Uses lark_literal(start, start_is_special) + │ └─ Returns: "\"\"" (string literal syntax) + │ + ▼ +[Lark Grammar String] + │ + ├─ Token ID mode: "tool_call: <[151657]> ws json_array ws <[151658]>" + └─ String mode: "tool_call: \"\" ws json_array ws \"\"" +``` + +### lark_special_token() Function + +**Location**: [`src/tools/schema.rs:60-67`](src/tools/schema.rs:60-67) + +```rust +fn lark_special_token(token_ids: &HashSet) -> String { + if token_ids.is_empty() { + return String::new(); + } + // Join multiple token IDs with | + let ids: Vec = token_ids.iter().map(|id| format!("[{}]", id)).collect(); + format!("<{}>", ids.join(",")) +} +``` + +### Example Output + +With token IDs `{151657, 151658}`: +``` +tool_call: <[151657]> ws json_array ws <[151658]> +``` + +Without token IDs (fallback to strings): +``` +tool_call: "" ws json_array ws "" +``` + +### Tests Verifying Token ID Support + +1. **`test_build_json_tool_lark_grammar_qwen3_with_token_ids`** (lines 764-783) + - Verifies that token IDs are converted to `<[token_id]>` syntax + - Checks that the generated grammar contains the correct token IDs + +2. **`test_lark_special_token_single_id`** (lines 785-791) + - Tests single token ID conversion: `<[151657]>` + +3. **`test_lark_special_token_multiple_ids`** (lines 793-800) + - Tests multiple token IDs: `<[151657],[151658]>` + +4. **`test_lark_special_token_empty`** (lines 802-807) + - Tests empty token ID set returns empty string + +--- + +## 13. GRAMMAR CONSTRUCTION DETAILS + +### The `rule_N` Indirection Problem + +**Old behavior** (incorrect): +```lark +start: rule_0 | rule_1 +rule_0: TEXT +TEXT: /(.|[\\n\\r])*/ +rule_1: +tool_call: ... +``` + +This creates an unnecessary level of indirection where: +1. `rule_0` references `TEXT` (which is actually a terminal) +2. `rule_1` is empty and just wraps `tool_call` +3. The `start` rule alternates between these wrappers + +**New behavior** (correct): +```lark +start: TEXT | tool_call +TEXT: /((?s).)*/ # (?s) enables dotall mode +tool_call: ... +``` + +This produces a flat grammar where: +1. `start` directly alternates between `TEXT` and `tool_call` +2. No intermediate `rule_N` wrappers +3. Cleaner, more efficient grammar + +### Implementation + +The fix is implemented in two helper functions: + +1. **`parse_lark_grammar()`**: Extracts the start rule's RHS and remaining rules +2. **`combine_rules()`**: Merges rules while handling duplicates + +### Performance Impact + +- **Smaller grammar size**: No intermediate rule wrappers +- **Faster parsing**: Fewer Earley items to track +- **Lower memory usage**: Simpler grammar structure +- **Better error messages**: Direct alternation is easier to understand + +--- + +--- + +## 15. EOS TOKEN MANDATE FOR FREEFORM GENERATION + +### Why EOS Tokens Are Required + +For freeform TEXT generation (non-constrained), the grammar MUST include an explicit EOS token boundary. Without it: + +1. **Mask Preemption**: The `compute_mask()` function returns token IDs before generation, but the TEXT pattern `/((?s).)*/` allows any character including EOS +2. **No Finite Boundary**: Without an explicit EOS in the grammar, the lexer has no way to know when to stop accepting TEXT tokens +3. **Run-on Generation**: The model continues generating indefinitely until max_tokens is reached + +### Correct TEXT Pattern with EOS + +```lark +start: text_with_eos +text_with_eos: TEXT eos? +TEXT: /(?s:.*)/ +eos: <[248044]> | <[248046]> | <[248048]> | <[248052]> | <[248054]> | <[248050]> +``` + +### Incorrect TEXT Pattern (causes run-on generation) + +```lark +start: text +text: TEXT +TEXT: /(?s:.*)/ +``` + +### Implementation in chat_text_expression_with_eos() + +The function [`chat_text_expression_with_eos()`](src/utils/guidance.rs:485) in guidance.rs properly handles this: + +```rust +pub fn chat_text_expression_with_eos(special_tokens: &SpecialTokens) -> String { + let eos_token_ids = special_tokens.eos_ids(); + + let eos_pattern = if eos_token_ids.is_empty() { + // Fallback to stop="" when no EOS tokens available + r#"start: text +text[stop=""]: /((?s).*?)/"#.to_string() + } else if eos_token_ids.len() == 1 { + format!(r#"start: text_with_eos +text_with_eos: TEXT eos? +TEXT: /(?s:.*)/ +eos: <[{}]>"#, eos_token_ids[0]) + } else { + let ids: Vec = eos_token_ids.iter().map(|id| format!("<[{}]>", id)).collect(); + let eos_alternation = ids.join(" | "); + format!(r#"start: text_with_eos +text_with_eos: TEXT eos? +TEXT: /(?s:.*)/ +eos: {}"#, eos_alternation) + }; + + eos_pattern +} +``` + +### Key Points + +1. **Use `chat_text_expression_with_eos()`** instead of `chat_text_expression()` when freeform TEXT is needed +2. **Always include EOS tokens** in the grammar for unconstrained generation +3. **Avoid `stop=""` patterns** - they don't work reliably with llguidance's lexer +4. **Use `eos?` syntax** to make EOS optional at the end of text + +--- + +## 16. QWEN CODER TOOL PARSING ISSUES + +### Problem: XML Nested Tags in Parameter Values + +Qwen Coder models output tool parameters with XML-style nested tags like: + +```xml +<‌tool_call> +<‌function=edit_file> +<‌parameter=file_path>/tmp/a.rs +<‌parameter=new_string> +fn a() { let x = vec![1,2,3]; } +<‌/function> +<‌/tool_call> +``` + +### The Grammar Challenge + +The current grammar uses regex patterns to match XML content: + +```lark +value_4_0: /[^<]*(<[^\/][^<]*)*?/ +``` + +This pattern: +- **Allows**: Regular text and non-closing angle brackets +- **Fails on**: Content that contains `<` followed by a `/` (closing tag) - **premature termination** +- **Fails on**: Content that contains `<` followed by a letter (opening tag) - **false positive tag detection** + +### Why This Is Fundamentally Broken + +1. **Look-Ahead Limitation**: Earley regex cannot express "match until you see `<‌/parameter>` but allow `<‌function=...>` in between" +2. **Finite Masks**: llguidance precomputes token masks, but nested XML requires unbounded context +3. **No Recursive Grammars**: Lark cannot express recursive XML structures in a way that maps to token masks + +### Current Workarounds + +#### Option A: Conservative Text Matching (Current) +```lark +value: /[^<]*(<[^\/][^<]*)*?/ +``` +- **Pros**: Works for most cases, finite mask possible +- **Cons**: Fails if parameter content contains `<` character + +#### Option B: Allow Any Character Until Strict End +```lark +value: /(?s).*?(?=<‌\/parameter>)/ +``` +- **Pros**: Handles `<` in content +- **Cons**: Requires look-ahead, impossible with finite masks + +#### Option C: Use Token IDs Instead of String Literals +```lark +value: /[^<]*(<[0-9]+[^\/][^<]*)*?/ +``` +- **Pros**: More flexible pattern matching +- **Cons**: Still can't handle nested `<` characters + +### The Real Problem + +``` +<‌parameter=new_string> ← Start of parameter +fn a() { let x = vec![1,2,3]; } ← Contains '<' characters +<‌/parameter> ← End of parameter (but mask sees '<' and thinks it's a tag) +``` + +When the mask encounters `<`, it: +1. Checks if next character is `/` → closing tag +2. Checks if next character is letter → opening tag +3. **Preempts content generation** before the actual `` + +### Recommended Solution: Avoid XML Parameters for Tool Calls + +Instead of nested XML like: + +```lark +<‌function=edit_file> +<‌parameter=file_path>/tmp/a.rs +<‌parameter=new_string>fn a() { let x = vec![1,2,3]; } +<‌/function> +``` + +Use **flat JSON** format: + +```json +{ + "name": "edit_file", + "arguments": { + "file_path": "/tmp/a.rs", + "new_string": "fn a() { let x = vec![1,2,3]; }" + } +} +``` + +### Grammar for JSON Tool Calls (Recommended) + +```lark +start: tool_call +tool_call: "<‌tool_call>" ws json_array ws "<‌/tool_call>" +json_array: "[" obj ("," obj)* "]" +obj: obj_search | obj_edit +obj_search: %json {"type":"object","properties":{...}} +obj_edit: %json {"type":"object","properties":{...}} +ws: /[ \t\r\n]+/ +``` + +This avoids the XML nested tag problem entirely by: +1. Using `%json` directives for structured parameter schemas +2. Not exposing parameter tags in the grammar +3. Letting the parser validate JSON structure instead of regex + +### Summary + +| Issue | Current State | Recommendation | +|-------|--------------|----------------| +| Nested XML tags | Cannot be expressed in finite mask grammar | Use JSON instead | +| `<` in parameter values | Causes premature termination | Avoid XML format | +| Look-ahead parsing | Not supported by llguidance lexer | Use simpler grammar structures | +| | + +Last updated: 2026-03-07 \ No newline at end of file diff --git a/example/special-tokens-extraction/Cargo.lock b/example/special-tokens-extraction/Cargo.lock new file mode 100644 index 00000000..28fcef0f --- /dev/null +++ b/example/special-tokens-extraction/Cargo.lock @@ -0,0 +1,5851 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "getrandom 0.3.4", + "once_cell", + "serde", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "akin" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1763692fc1416554cf051efc56a3de5595eca47299d731cc5c2b583adf8b4d2f" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anstream" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" + +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.61.2", +] + +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + +[[package]] +name = "arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" +dependencies = [ + "derive_arbitrary", +] + +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + +[[package]] +name = "attention-rs" +version = "0.4.1" +source = "git+https://github.com/guoqingbao/attention.rs.git?rev=af0b475#af0b4755bed845873b793877eae43f3198e46665" +dependencies = [ + "candle-core", + "candle-nn", + "half", + "parking_lot", + "rayon", + "serde", + "serde_json", + "tracing", +] + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "axum" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" +dependencies = [ + "async-trait", + "axum-core", + "bytes", + "futures-util", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-util", + "itoa", + "matchit", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "rustversion", + "serde", + "serde_json", + "serde_path_to_error", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tower", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "axum-core" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" +dependencies = [ + "async-trait", + "bytes", + "futures-util", + "http", + "http-body", + "http-body-util", + "mime", + "pin-project-lite", + "rustversion", + "sync_wrapper", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "base64" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" +dependencies = [ + "serde_core", +] + +[[package]] +name = "block2" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdeb9d870516001442e364c5220d3574d2da8dc765554b4a617230d33fa58ef5" +dependencies = [ + "objc2", +] + +[[package]] +name = "bumpalo" +version = "3.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" + +[[package]] +name = "bytemuck" +version = "1.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" +dependencies = [ + "bytemuck_derive", +] + +[[package]] +name = "bytemuck_derive" +version = "1.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "byteorder-lite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" + +[[package]] +name = "bytes" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" + +[[package]] +name = "candle-core" +version = "0.8.3" +source = "git+https://github.com/guoqingbao/candle.git?rev=1e9d1a9#1e9d1a9cd8d99a5f12a7dd7dc467aaa1bd46847a" +dependencies = [ + "byteorder", + "gemm 0.17.1", + "half", + "memmap2", + "num-traits", + "num_cpus", + "rand 0.9.2", + "rand_distr", + "rayon", + "safetensors", + "thiserror 1.0.69", + "ug", + "yoke 0.7.5", + "zip", +] + +[[package]] +name = "candle-nn" +version = "0.8.3" +source = "git+https://github.com/guoqingbao/candle.git?rev=1e9d1a9#1e9d1a9cd8d99a5f12a7dd7dc467aaa1bd46847a" +dependencies = [ + "candle-core", + "half", + "num-traits", + "rayon", + "safetensors", + "serde", + "thiserror 1.0.69", +] + +[[package]] +name = "castaway" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dec551ab6e7578819132c713a93c022a05d60159dc86e7a7050223577484c55a" +dependencies = [ + "rustversion", +] + +[[package]] +name = "cc" +version = "1.2.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" +dependencies = [ + "find-msvc-tools", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + +[[package]] +name = "chrono" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-link 0.2.1", +] + +[[package]] +name = "clap" +version = "4.5.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2797f34da339ce31042b27d23607e051786132987f595b02ba4f6a6dffb7030a" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24a241312cea5059b13574bb9b3861cabf758b879c15190b37b6d6fd63ab6876" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "clap_lex" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" + +[[package]] +name = "color_quant" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b" + +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + +[[package]] +name = "colored" +version = "3.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "compact_str" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a" +dependencies = [ + "castaway", + "cfg-if", + "itoa", + "rustversion", + "ryu", + "serde", + "static_assertions", +] + +[[package]] +name = "console" +version = "0.15.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" +dependencies = [ + "encode_unicode", + "libc", + "once_cell", + "unicode-width", + "windows-sys 0.59.0", +] + +[[package]] +name = "convert_case" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "633458d4ef8c78b72454de2d54fd6ab2e60f9e02be22f3c6104cdc8a4e0fceb9" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-epoch", + "crossbeam-queue", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-queue" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crossterm" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e64e6c0fbe2c17357405f7c758c1ef960fce08bdfb2c03d88d2a18d7e09c4b67" +dependencies = [ + "bitflags 1.3.2", + "crossterm_winapi", + "libc", + "mio 0.8.11", + "parking_lot", + "signal-hook", + "signal-hook-mio", + "winapi", +] + +[[package]] +name = "crossterm" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "829d955a0bb380ef178a640b91779e3987da38c9aea133b20614cfed8cdea9c6" +dependencies = [ + "bitflags 2.11.0", + "crossterm_winapi", + "mio 1.1.1", + "parking_lot", + "rustix 0.38.44", + "serde", + "signal-hook", + "signal-hook-mio", + "winapi", +] + +[[package]] +name = "crossterm" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8b9f2e4c67f833b660cdb0a3523065869fb35570177239812ed4c905aeff87b" +dependencies = [ + "bitflags 2.11.0", + "crossterm_winapi", + "derive_more 2.1.1", + "document-features", + "mio 1.1.1", + "parking_lot", + "rustix 1.1.4", + "signal-hook", + "signal-hook-mio", + "winapi", +] + +[[package]] +name = "crossterm_winapi" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b" +dependencies = [ + "winapi", +] + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "ctrlc" +version = "3.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0b1fab2ae45819af2d0731d60f2afe17227ebb1a1538a236da84c93e9a60162" +dependencies = [ + "dispatch2", + "nix", + "windows-sys 0.61.2", +] + +[[package]] +name = "darling" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +dependencies = [ + "darling_core 0.20.11", + "darling_macro 0.20.11", +] + +[[package]] +name = "darling" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0" +dependencies = [ + "darling_core 0.21.3", + "darling_macro 0.21.3", +] + +[[package]] +name = "darling_core" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.117", +] + +[[package]] +name = "darling_core" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.117", +] + +[[package]] +name = "darling_macro" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +dependencies = [ + "darling_core 0.20.11", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "darling_macro" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" +dependencies = [ + "darling_core 0.21.3", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "dary_heap" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06d2e3287df1c007e74221c49ca10a95d557349e54b3a75dc2fb14712c751f04" +dependencies = [ + "serde", +] + +[[package]] +name = "deranged" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" +dependencies = [ + "powerfmt", + "serde_core", +] + +[[package]] +name = "derive_arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "derive_builder" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" +dependencies = [ + "derive_builder_macro", +] + +[[package]] +name = "derive_builder_core" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" +dependencies = [ + "darling 0.20.11", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "derive_builder_macro" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" +dependencies = [ + "derive_builder_core", + "syn 2.0.117", +] + +[[package]] +name = "derive_more" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a9b99b9cbbe49445b21764dc0625032a89b145a2642e67603e1c936f5458d05" +dependencies = [ + "derive_more-impl 1.0.0", +] + +[[package]] +name = "derive_more" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d751e9e49156b02b44f9c1815bcb94b984cdcc4396ecc32521c739452808b134" +dependencies = [ + "derive_more-impl 2.1.1", +] + +[[package]] +name = "derive_more-impl" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7330aeadfbe296029522e6c40f315320aba36fc43a5b3632f3795348f3bd22" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", + "unicode-xid", +] + +[[package]] +name = "derive_more-impl" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "799a97264921d8623a957f6c3b9011f3b5492f557bbb7a5a19b7fa6d06ba8dcb" +dependencies = [ + "convert_case", + "proc-macro2", + "quote", + "rustc_version", + "syn 2.0.117", +] + +[[package]] +name = "derivre" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "786c7c65c4ef0c7deb05de3005e01991612a8f09fe0844fc0969c68b90468ba8" +dependencies = [ + "anyhow", + "bytemuck", + "bytemuck_derive", + "hashbrown 0.15.5", + "regex-syntax", + "strum 0.27.2", +] + +[[package]] +name = "dirs" +version = "5.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" +dependencies = [ + "dirs-sys 0.4.1", +] + +[[package]] +name = "dirs" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" +dependencies = [ + "dirs-sys 0.5.0", +] + +[[package]] +name = "dirs-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" +dependencies = [ + "libc", + "option-ext", + "redox_users 0.4.6", + "windows-sys 0.48.0", +] + +[[package]] +name = "dirs-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" +dependencies = [ + "libc", + "option-ext", + "redox_users 0.5.2", + "windows-sys 0.61.2", +] + +[[package]] +name = "dispatch2" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e0e367e4e7da84520dedcac1901e4da967309406d1e51017ae1abfb97adbd38" +dependencies = [ + "bitflags 2.11.0", + "block2", + "libc", + "objc2", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "doctest-file" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aac81fa3e28d21450aa4d2ac065992ba96a1d7303efbce51a95f4fd175b67562" + +[[package]] +name = "document-features" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4b8a88685455ed29a21542a33abd9cb6510b6b129abadabdcef0f4c55bc8f61" +dependencies = [ + "litrs", +] + +[[package]] +name = "dyn-clone" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" + +[[package]] +name = "dyn-stack" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e53799688f5632f364f8fb387488dd05db9fe45db7011be066fc20e7027f8b" +dependencies = [ + "bytemuck", + "reborrow", +] + +[[package]] +name = "dyn-stack" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c4713e43e2886ba72b8271aa66c93d722116acf7a75555cce11dcde84388fe8" +dependencies = [ + "bytemuck", + "dyn-stack-macros", +] + +[[package]] +name = "dyn-stack-macros" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1d926b4d407d372f141f93bb444696142c29d32962ccbd3531117cf3aa0bfa9" + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +dependencies = [ + "serde", +] + +[[package]] +name = "encode_unicode" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" + +[[package]] +name = "encoding_rs" +version = "0.8.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "enum-as-inner" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1e6a265c649f3f5979b601d26f1d05ada116434c87741c9493cb56218f76cbc" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "esaxx-rs" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6" +dependencies = [ + "cc", +] + +[[package]] +name = "fancy-regex" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298" +dependencies = [ + "bit-set", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "fax" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f05de7d48f37cd6730705cbca900770cab77a89f413d23e100ad7fad7795a0ab" +dependencies = [ + "fax_derive", +] + +[[package]] +name = "fax_derive" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0aca10fb742cb43f9e7bb8467c91aa9bcb8e3ffbc6a6f7389bb93ffc920577d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "fd-lock" +version = "4.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78" +dependencies = [ + "cfg-if", + "rustix 1.1.4", + "windows-sys 0.59.0", +] + +[[package]] +name = "fdeflate" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e6853b52649d4ac5c0bd02320cddc5ba956bdb407c4b75a2c6b75bf51500f8c" +dependencies = [ + "simd-adler32", +] + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "flume" +version = "0.10.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1657b4441c3403d9f7b3409e47575237dac27b1b5726df654a6ecbf92f0f7577" +dependencies = [ + "futures-core", + "futures-sink", + "nanorand", + "pin-project", + "spin", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + +[[package]] +name = "form_urlencoded" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "futures" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" + +[[package]] +name = "futures-executor" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" + +[[package]] +name = "futures-macro" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "futures-sink" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" + +[[package]] +name = "futures-task" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" + +[[package]] +name = "futures-util" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "slab", +] + +[[package]] +name = "fuzzy-matcher" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54614a3312934d066701a80f20f15fa3b56d67ac7722b39eea5b4c9dd1d66c94" +dependencies = [ + "thread_local", +] + +[[package]] +name = "gemm" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ab24cc62135b40090e31a76a9b2766a501979f3070fa27f689c27ec04377d32" +dependencies = [ + "dyn-stack 0.10.0", + "gemm-c32 0.17.1", + "gemm-c64 0.17.1", + "gemm-common 0.17.1", + "gemm-f16 0.17.1", + "gemm-f32 0.17.1", + "gemm-f64 0.17.1", + "num-complex", + "num-traits", + "paste", + "raw-cpuid 10.7.0", + "seq-macro", +] + +[[package]] +name = "gemm" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab96b703d31950f1aeddded248bc95543c9efc7ac9c4a21fda8703a83ee35451" +dependencies = [ + "dyn-stack 0.13.2", + "gemm-c32 0.18.2", + "gemm-c64 0.18.2", + "gemm-common 0.18.2", + "gemm-f16 0.18.2", + "gemm-f32 0.18.2", + "gemm-f64 0.18.2", + "num-complex", + "num-traits", + "paste", + "raw-cpuid 11.6.0", + "seq-macro", +] + +[[package]] +name = "gemm-c32" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9c030d0b983d1e34a546b86e08f600c11696fde16199f971cd46c12e67512c0" +dependencies = [ + "dyn-stack 0.10.0", + "gemm-common 0.17.1", + "num-complex", + "num-traits", + "paste", + "raw-cpuid 10.7.0", + "seq-macro", +] + +[[package]] +name = "gemm-c32" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6db9fd9f40421d00eea9dd0770045a5603b8d684654816637732463f4073847" +dependencies = [ + "dyn-stack 0.13.2", + "gemm-common 0.18.2", + "num-complex", + "num-traits", + "paste", + "raw-cpuid 11.6.0", + "seq-macro", +] + +[[package]] +name = "gemm-c64" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbb5f2e79fefb9693d18e1066a557b4546cd334b226beadc68b11a8f9431852a" +dependencies = [ + "dyn-stack 0.10.0", + "gemm-common 0.17.1", + "num-complex", + "num-traits", + "paste", + "raw-cpuid 10.7.0", + "seq-macro", +] + +[[package]] +name = "gemm-c64" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfcad8a3d35a43758330b635d02edad980c1e143dc2f21e6fd25f9e4eada8edf" +dependencies = [ + "dyn-stack 0.13.2", + "gemm-common 0.18.2", + "num-complex", + "num-traits", + "paste", + "raw-cpuid 11.6.0", + "seq-macro", +] + +[[package]] +name = "gemm-common" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2e7ea062c987abcd8db95db917b4ffb4ecdfd0668471d8dc54734fdff2354e8" +dependencies = [ + "bytemuck", + "dyn-stack 0.10.0", + "half", + "num-complex", + "num-traits", + "once_cell", + "paste", + "pulp 0.18.22", + "raw-cpuid 10.7.0", + "rayon", + "seq-macro", + "sysctl 0.5.5", +] + +[[package]] +name = "gemm-common" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a352d4a69cbe938b9e2a9cb7a3a63b7e72f9349174a2752a558a8a563510d0f3" +dependencies = [ + "bytemuck", + "dyn-stack 0.13.2", + "half", + "libm", + "num-complex", + "num-traits", + "once_cell", + "paste", + "pulp 0.21.5", + "raw-cpuid 11.6.0", + "rayon", + "seq-macro", + "sysctl 0.6.0", +] + +[[package]] +name = "gemm-f16" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ca4c06b9b11952071d317604acb332e924e817bd891bec8dfb494168c7cedd4" +dependencies = [ + "dyn-stack 0.10.0", + "gemm-common 0.17.1", + "gemm-f32 0.17.1", + "half", + "num-complex", + "num-traits", + "paste", + "raw-cpuid 10.7.0", + "rayon", + "seq-macro", +] + +[[package]] +name = "gemm-f16" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cff95ae3259432f3c3410eaa919033cd03791d81cebd18018393dc147952e109" +dependencies = [ + "dyn-stack 0.13.2", + "gemm-common 0.18.2", + "gemm-f32 0.18.2", + "half", + "num-complex", + "num-traits", + "paste", + "raw-cpuid 11.6.0", + "rayon", + "seq-macro", +] + +[[package]] +name = "gemm-f32" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9a69f51aaefbd9cf12d18faf273d3e982d9d711f60775645ed5c8047b4ae113" +dependencies = [ + "dyn-stack 0.10.0", + "gemm-common 0.17.1", + "num-complex", + "num-traits", + "paste", + "raw-cpuid 10.7.0", + "seq-macro", +] + +[[package]] +name = "gemm-f32" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc8d3d4385393304f407392f754cd2dc4b315d05063f62cf09f47b58de276864" +dependencies = [ + "dyn-stack 0.13.2", + "gemm-common 0.18.2", + "num-complex", + "num-traits", + "paste", + "raw-cpuid 11.6.0", + "seq-macro", +] + +[[package]] +name = "gemm-f64" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa397a48544fadf0b81ec8741e5c0fba0043008113f71f2034def1935645d2b0" +dependencies = [ + "dyn-stack 0.10.0", + "gemm-common 0.17.1", + "num-complex", + "num-traits", + "paste", + "raw-cpuid 10.7.0", + "seq-macro", +] + +[[package]] +name = "gemm-f64" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35b2a4f76ce4b8b16eadc11ccf2e083252d8237c1b589558a49b0183545015bd" +dependencies = [ + "dyn-stack 0.13.2", + "gemm-common 0.18.2", + "num-complex", + "num-traits", + "paste", + "raw-cpuid 11.6.0", + "seq-macro", +] + +[[package]] +name = "getopts" +version = "0.2.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "getrandom" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "wasi", + "wasm-bindgen", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "r-efi 5.3.0", + "wasip2", + "wasm-bindgen", +] + +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi 6.0.0", + "wasip2", + "wasip3", +] + +[[package]] +name = "getset" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cf0fc11e47561d47397154977bc219f4cf809b2974facc3ccb3b89e2436f912" +dependencies = [ + "proc-macro-error2", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "gif" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5df2ba84018d80c213569363bdcd0c64e6933c67fe4c1d60ecf822971a3c35e" +dependencies = [ + "color_quant", + "weezl", +] + +[[package]] +name = "h2" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http", + "indexmap 2.13.0", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "bytemuck", + "cfg-if", + "crunchy", + "num-traits", + "rand 0.9.2", + "rand_distr", + "zerocopy", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", +] + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "hf-hub" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "629d8f3bbeda9d148036d6b0de0a3ab947abd08ce90626327fc3547a49d59d97" +dependencies = [ + "dirs 6.0.0", + "futures", + "http", + "indicatif", + "libc", + "log", + "native-tls", + "num_cpus", + "rand 0.9.2", + "reqwest", + "serde", + "serde_json", + "thiserror 2.0.18", + "tokio", + "ureq", + "windows-sys 0.60.2", +] + +[[package]] +name = "http" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" +dependencies = [ + "bytes", + "itoa", +] + +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http", +] + +[[package]] +name = "http-body-util" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" +dependencies = [ + "bytes", + "futures-core", + "http", + "http-body", + "pin-project-lite", +] + +[[package]] +name = "http-range-header" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9171a2ea8a68358193d15dd5d70c1c10a2afc3e7e4c5bc92bc9f025cebd7359c" + +[[package]] +name = "httparse" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "hyper" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" +dependencies = [ + "atomic-waker", + "bytes", + "futures-channel", + "futures-core", + "h2", + "http", + "http-body", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "pin-utils", + "smallvec", + "tokio", + "want", +] + +[[package]] +name = "hyper-rustls" +version = "0.27.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" +dependencies = [ + "http", + "hyper", + "hyper-util", + "rustls", + "rustls-pki-types", + "tokio", + "tokio-rustls", + "tower-service", + "webpki-roots 1.0.6", +] + +[[package]] +name = "hyper-tls" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" +dependencies = [ + "bytes", + "http-body-util", + "hyper", + "hyper-util", + "native-tls", + "tokio", + "tokio-native-tls", + "tower-service", +] + +[[package]] +name = "hyper-util" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" +dependencies = [ + "base64 0.22.1", + "bytes", + "futures-channel", + "futures-util", + "http", + "http-body", + "hyper", + "ipnet", + "libc", + "percent-encoding", + "pin-project-lite", + "socket2", + "system-configuration", + "tokio", + "tower-service", + "tracing", + "windows-registry", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core 0.62.2", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "icu_collections" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" +dependencies = [ + "displaydoc", + "potential_utf", + "yoke 0.8.1", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" +dependencies = [ + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" + +[[package]] +name = "icu_properties" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" + +[[package]] +name = "icu_provider" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" +dependencies = [ + "displaydoc", + "icu_locale_core", + "writeable", + "yoke 0.8.1", + "zerofrom", + "zerotrie", + "zerovec", +] + +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + +[[package]] +name = "idna" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + +[[package]] +name = "image" +version = "0.25.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6506c6c10786659413faa717ceebcb8f70731c0a60cbae39795fdf114519c1a" +dependencies = [ + "bytemuck", + "byteorder-lite", + "color_quant", + "gif", + "image-webp", + "moxcms", + "num-traits", + "png", + "tiff", + "zune-core 0.5.1", + "zune-jpeg 0.5.12", +] + +[[package]] +name = "image-webp" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "525e9ff3e1a4be2fbea1fdf0e98686a6d98b4d8f937e1bf7402245af1909e8c3" +dependencies = [ + "byteorder-lite", + "quick-error", +] + +[[package]] +name = "include_dir" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "923d117408f1e49d914f1a379a309cffe4f18c05cf4e3d12e613a15fc81bd0dd" +dependencies = [ + "include_dir_macros", +] + +[[package]] +name = "include_dir_macros" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cab85a7ed0bd5f0e76d93846e0147172bed2e2d3f859bcc33a8d9699cad1a75" +dependencies = [ + "proc-macro2", + "quote", +] + +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", + "serde", +] + +[[package]] +name = "indexmap" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +dependencies = [ + "equivalent", + "hashbrown 0.16.1", + "serde", + "serde_core", +] + +[[package]] +name = "indicatif" +version = "0.17.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235" +dependencies = [ + "console", + "number_prefix", + "portable-atomic", + "unicode-width", + "web-time", +] + +[[package]] +name = "inquire" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6654738b8024300cf062d04a1c13c10c8e2cea598ec1c47dc9b6641159429756" +dependencies = [ + "bitflags 2.11.0", + "crossterm 0.29.0", + "dyn-clone", + "fuzzy-matcher", + "unicode-segmentation", + "unicode-width", +] + +[[package]] +name = "interprocess" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6be5e5c847dbdb44564bd85294740d031f4f8aeb3464e5375ef7141f7538db69" +dependencies = [ + "doctest-file", + "libc", + "recvmsg", + "widestring", + "windows-sys 0.52.0", +] + +[[package]] +name = "ipnet" +version = "2.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" + +[[package]] +name = "iri-string" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a" +dependencies = [ + "memchr", + "serde", +] + +[[package]] +name = "is-macro" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d57a3e447e24c22647738e4607f1df1e0ec6f72e16182c4cd199f647cdfb0e4" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" + +[[package]] +name = "js-sys" +version = "0.3.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "lalrpop-util" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507460a910eb7b32ee961886ff48539633b788a36b65692b95f225b844c82553" + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + +[[package]] +name = "libc" +version = "0.2.182" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" + +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link 0.2.1", +] + +[[package]] +name = "libm" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" + +[[package]] +name = "libredox" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1744e39d1d6a9948f4f388969627434e31128196de472883b39f148769bfe30a" +dependencies = [ + "libc", +] + +[[package]] +name = "linux-raw-sys" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" + +[[package]] +name = "linux-raw-sys" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" + +[[package]] +name = "litemap" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" + +[[package]] +name = "litrs" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11d3d7f243d5c5a8b9bb5d6dd2b1602c0cb0b9db1621bafc7ed66e35ff9fe092" + +[[package]] +name = "llguidance" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd6bd9b883ab54097b0651668e5b590be1c7ca906500db40d4de74522382e011" +dependencies = [ + "anyhow", + "derivre", + "indexmap 2.13.0", + "regex-syntax", + "serde", + "serde_json", + "toktrie", +] + +[[package]] +name = "local-ip-address" +version = "0.6.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79ef8c257c92ade496781a32a581d43e3d512cf8ce714ecf04ea80f93ed0ff4a" +dependencies = [ + "libc", + "neli", + "windows-sys 0.61.2", +] + +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "lru-slab" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" + +[[package]] +name = "macro_rules_attribute" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65049d7923698040cd0b1ddcced9b0eb14dd22c5f86ae59c3740eab64a676520" +dependencies = [ + "macro_rules_attribute-proc_macro", + "paste", +] + +[[package]] +name = "macro_rules_attribute-proc_macro" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "670fdfda89751bc4a84ac13eaa63e205cf0fd22b4c9a5fbfa085b63c1f1d3a30" + +[[package]] +name = "malachite" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fbdf9cb251732db30a7200ebb6ae5d22fe8e11397364416617d2c2cf0c51cb5" +dependencies = [ + "malachite-base", + "malachite-nz", + "malachite-q", +] + +[[package]] +name = "malachite-base" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ea0ed76adf7defc1a92240b5c36d5368cfe9251640dcce5bd2d0b7c1fd87aeb" +dependencies = [ + "hashbrown 0.14.5", + "itertools 0.11.0", + "libm", + "ryu", +] + +[[package]] +name = "malachite-bigint" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d149aaa2965d70381709d9df4c7ee1fc0de1c614a4efc2ee356f5e43d68749f8" +dependencies = [ + "derive_more 1.0.0", + "malachite", + "num-integer", + "num-traits", + "paste", +] + +[[package]] +name = "malachite-nz" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34a79feebb2bc9aa7762047c8e5495269a367da6b5a90a99882a0aeeac1841f7" +dependencies = [ + "itertools 0.11.0", + "libm", + "malachite-base", +] + +[[package]] +name = "malachite-q" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50f235d5747b1256b47620f5640c2a17a88c7569eebdf27cd9cb130e1a619191" +dependencies = [ + "itertools 0.11.0", + "malachite-base", + "malachite-nz", +] + +[[package]] +name = "matchers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +dependencies = [ + "regex-automata", +] + +[[package]] +name = "matchit" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "memmap2" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3" +dependencies = [ + "libc", + "stable_deref_trait", +] + +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + +[[package]] +name = "mime_guess" +version = "2.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e" +dependencies = [ + "mime", + "unicase", +] + +[[package]] +name = "minijinja" +version = "2.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ea5ea1e90055f200af6b8e52a4a34e05e77e7fee953a9fb40c631efdc43cab1" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "minijinja-contrib" +version = "2.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2fce60cb2e26ba7ddd485c8f5d3d635535e465c195bfb4af85971b428a985d0" +dependencies = [ + "minijinja", + "serde", +] + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + +[[package]] +name = "mio" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" +dependencies = [ + "libc", + "log", + "wasi", + "windows-sys 0.48.0", +] + +[[package]] +name = "mio" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" +dependencies = [ + "libc", + "log", + "wasi", + "windows-sys 0.61.2", +] + +[[package]] +name = "monostate" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3341a273f6c9d5bef1908f17b7267bbab0e95c9bf69a0d4dcf8e9e1b2c76ef67" +dependencies = [ + "monostate-impl", + "serde", + "serde_core", +] + +[[package]] +name = "monostate-impl" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4db6d5580af57bf992f59068d4ea26fd518574ff48d7639b255a36f9de6e7e9" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "moxcms" +version = "0.7.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac9557c559cd6fc9867e122e20d2cbefc9ca29d80d027a8e39310920ed2f0a97" +dependencies = [ + "num-traits", + "pxfm", +] + +[[package]] +name = "nanorand" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a51313c5820b0b02bd422f4b44776fbf47961755c74ce64afc73bfad10226c3" +dependencies = [ + "getrandom 0.2.17", +] + +[[package]] +name = "native-tls" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "465500e14ea162429d264d44189adc38b199b62b1c21eea9f69e4b73cb03bbf2" +dependencies = [ + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework", + "security-framework-sys", + "tempfile", +] + +[[package]] +name = "neli" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22f9786d56d972959e1408b6a93be6af13b9c1392036c5c1fafa08a1b0c6ee87" +dependencies = [ + "bitflags 2.11.0", + "byteorder", + "derive_builder", + "getset", + "libc", + "log", + "neli-proc-macros", + "parking_lot", +] + +[[package]] +name = "neli-proc-macros" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05d8d08c6e98f20a62417478ebf7be8e1425ec9acecc6f63e22da633f6b71609" +dependencies = [ + "either", + "proc-macro2", + "quote", + "serde", + "syn 2.0.117", +] + +[[package]] +name = "nix" +version = "0.31.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d6d0705320c1e6ba1d912b5e37cf18071b6c2e9b7fa8215a1e8a7651966f5d3" +dependencies = [ + "bitflags 2.11.0", + "cfg-if", + "cfg_aliases", + "libc", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "ntapi" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3b335231dfd352ffb0f8017f3b6027a4917f7df785ea2143d8af2adc66980ae" +dependencies = [ + "winapi", +] + +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "bytemuck", + "num-traits", +] + +[[package]] +name = "num-conv" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "num_cpus" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "num_enum" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1207a7e20ad57b847bbddc6776b968420d38292bbfe2089accff5e19e82454c" +dependencies = [ + "num_enum_derive", + "rustversion", +] + +[[package]] +name = "num_enum_derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff32365de1b6743cb203b710788263c44a03de03802daf96092f2da4fe6ba4d7" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "number_prefix" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" + +[[package]] +name = "objc2" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a12a8ed07aefc768292f076dc3ac8c48f3781c8f2d5851dd3d98950e8c5a89f" +dependencies = [ + "objc2-encode", +] + +[[package]] +name = "objc2-core-foundation" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536" +dependencies = [ + "bitflags 2.11.0", +] + +[[package]] +name = "objc2-encode" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef25abbcd74fb2609453eb695bd2f860d389e457f67dc17cafc8b8cbc89d0c33" + +[[package]] +name = "objc2-io-kit" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33fafba39597d6dc1fb709123dfa8289d39406734be322956a69f0931c73bb15" +dependencies = [ + "libc", + "objc2-core-foundation", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "onig" +version = "6.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0" +dependencies = [ + "bitflags 2.11.0", + "libc", + "once_cell", + "onig_sys", +] + +[[package]] +name = "onig_sys" +version = "69.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7f86c6eef3d6df15f23bcfb6af487cbd2fed4e5581d58d5bf1f5f8b7f6727dc" +dependencies = [ + "cc", + "pkg-config", +] + +[[package]] +name = "openai-protocol" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "563b4fe4e24b74c2c78c4645e613042b63317bc02d3be00427802c5f1eb236c6" +dependencies = [ + "bitflags 2.11.0", + "chrono", + "rand 0.9.2", + "serde", + "serde_json", + "serde_with", + "tokio", + "tracing", + "validator", +] + +[[package]] +name = "openssl" +version = "0.10.75" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328" +dependencies = [ + "bitflags 2.11.0", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "openssl-probe" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" + +[[package]] +name = "openssl-sys" +version = "0.9.111" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82cab2d520aa75e3c58898289429321eb788c3106963d0dc886ec7a5f4adc321" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + +[[package]] +name = "parking_lot" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-link 0.2.1", +] + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "percent-encoding" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" + +[[package]] +name = "phf" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" +dependencies = [ + "phf_shared", + "rand 0.8.5", +] + +[[package]] +name = "phf_shared" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" +dependencies = [ + "siphasher", +] + +[[package]] +name = "pin-project" +version = "1.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1749c7ed4bcaf4c3d0a3efc28538844fb29bcdd7d2b67b2be7e20ba861ff517" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b20ed30f105399776b9c883e68e536ef602a16ae6f596d2c473591d6ad64c6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + +[[package]] +name = "png" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60769b8b31b2a9f263dae2776c37b1b28ae246943cf719eb6946a1db05128a61" +dependencies = [ + "bitflags 2.11.0", + "crc32fast", + "fdeflate", + "flate2", + "miniz_oxide", +] + +[[package]] +name = "portable-atomic" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" + +[[package]] +name = "potential_utf" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" +dependencies = [ + "zerovec", +] + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn 2.0.117", +] + +[[package]] +name = "proc-macro-crate" +version = "3.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e67ba7e9b2b56446f1d419b1d807906278ffa1a658a8a5d8a39dcb1f5a78614f" +dependencies = [ + "toml_edit", +] + +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn 1.0.109", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96de42df36bb9bba5542fe9f1a054b8cc87e172759a1868aa05c1f3acc89dfc5" +dependencies = [ + "proc-macro2", + "quote", +] + +[[package]] +name = "proc-macro-error2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11ec05c52be0a07b08061f7dd003e7d7092e0472bc731b4af7bb1ef876109802" +dependencies = [ + "proc-macro-error-attr2", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "pulp" +version = "0.18.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0a01a0dc67cf4558d279f0c25b0962bd08fc6dec0137699eae304103e882fe6" +dependencies = [ + "bytemuck", + "libm", + "num-complex", + "reborrow", +] + +[[package]] +name = "pulp" +version = "0.21.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96b86df24f0a7ddd5e4b95c94fc9ed8a98f1ca94d3b01bdce2824097e7835907" +dependencies = [ + "bytemuck", + "cfg-if", + "libm", + "num-complex", + "reborrow", + "version_check", +] + +[[package]] +name = "pxfm" +version = "0.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5a041e753da8b807c9255f28de81879c78c876392ff2469cde94799b2896b9d" + +[[package]] +name = "quick-error" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" + +[[package]] +name = "quinn" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20" +dependencies = [ + "bytes", + "cfg_aliases", + "pin-project-lite", + "quinn-proto", + "quinn-udp", + "rustc-hash 2.1.1", + "rustls", + "socket2", + "thiserror 2.0.18", + "tokio", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-proto" +version = "0.11.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31" +dependencies = [ + "bytes", + "getrandom 0.3.4", + "lru-slab", + "rand 0.9.2", + "ring", + "rustc-hash 2.1.1", + "rustls", + "rustls-pki-types", + "slab", + "thiserror 2.0.18", + "tinyvec", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-udp" +version = "0.5.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd" +dependencies = [ + "cfg_aliases", + "libc", + "once_cell", + "socket2", + "tracing", + "windows-sys 0.60.2", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha 0.9.0", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.17", +] + +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.4", +] + +[[package]] +name = "rand_distr" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463" +dependencies = [ + "num-traits", + "rand 0.9.2", +] + +[[package]] +name = "raw-cpuid" +version = "10.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "raw-cpuid" +version = "11.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "498cd0dc59d73224351ee52a95fee0f1a617a2eae0e7d9d720cc622c73a54186" +dependencies = [ + "bitflags 2.11.0", +] + +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-cond" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2964d0cf57a3e7a06e8183d14a8b527195c706b7983549cd5462d5aa3747438f" +dependencies = [ + "either", + "itertools 0.14.0", + "rayon", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "reborrow" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03251193000f4bd3b042892be858ee50e8b3719f2b08e5833ac4353724632430" + +[[package]] +name = "recvmsg" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3edd4d5d42c92f0a659926464d4cce56b562761267ecf0f469d85b7de384175" + +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags 2.11.0", +] + +[[package]] +name = "redox_users" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" +dependencies = [ + "getrandom 0.2.17", + "libredox", + "thiserror 1.0.69", +] + +[[package]] +name = "redox_users" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" +dependencies = [ + "getrandom 0.2.17", + "libredox", + "thiserror 2.0.18", +] + +[[package]] +name = "reedline" +version = "0.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5cdfab7494d13ebfb6ce64828648518205d3ce8541ef1f94a27887f29d2d50b" +dependencies = [ + "chrono", + "crossterm 0.28.1", + "fd-lock", + "itertools 0.13.0", + "nu-ansi-term", + "serde", + "strip-ansi-escapes", + "strum 0.26.3", + "strum_macros 0.26.4", + "thiserror 2.0.18", + "unicode-segmentation", + "unicode-width", +] + +[[package]] +name = "ref-cast" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f354300ae66f76f1c85c5f84693f0ce81d747e2c3f21a45fef496d89c960bf7d" +dependencies = [ + "ref-cast-impl", +] + +[[package]] +name = "ref-cast-impl" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + +[[package]] +name = "reqwest" +version = "0.12.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" +dependencies = [ + "base64 0.22.1", + "bytes", + "encoding_rs", + "futures-channel", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-rustls", + "hyper-tls", + "hyper-util", + "js-sys", + "log", + "mime", + "native-tls", + "percent-encoding", + "pin-project-lite", + "quinn", + "rustls", + "rustls-pki-types", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tokio-native-tls", + "tokio-rustls", + "tokio-util", + "tower", + "tower-http", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-streams", + "web-sys", + "webpki-roots 1.0.6", +] + +[[package]] +name = "ring" +version = "0.17.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" +dependencies = [ + "cc", + "cfg-if", + "getrandom 0.2.17", + "libc", + "untrusted", + "windows-sys 0.52.0", +] + +[[package]] +name = "rmp" +version = "0.8.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ba8be72d372b2c9b35542551678538b562e7cf86c3315773cae48dfbfe7790c" +dependencies = [ + "num-traits", +] + +[[package]] +name = "rmp-serde" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72f81bee8c8ef9b577d1681a70ebbc962c232461e397b22c208c43c04b67a155" +dependencies = [ + "rmp", + "serde", +] + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "rustchatui" +version = "0.2.12" +source = "git+https://github.com/guoqingbao/rustchatui.git?rev=68caad9#68caad9153d0b68401c585fa02c04fd4de177b37" +dependencies = [ + "anyhow", + "axum", + "clap", + "colored", + "include_dir", + "inquire", + "local-ip-address", + "mime_guess", + "serde", + "tokio", + "tower", + "tower-http", +] + +[[package]] +name = "rustix" +version = "0.38.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" +dependencies = [ + "bitflags 2.11.0", + "errno", + "libc", + "linux-raw-sys 0.4.15", + "windows-sys 0.59.0", +] + +[[package]] +name = "rustix" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" +dependencies = [ + "bitflags 2.11.0", + "errno", + "libc", + "linux-raw-sys 0.12.1", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustls" +version = "0.23.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4" +dependencies = [ + "log", + "once_cell", + "ring", + "rustls-pki-types", + "rustls-webpki", + "subtle", + "zeroize", +] + +[[package]] +name = "rustls-pki-types" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" +dependencies = [ + "web-time", + "zeroize", +] + +[[package]] +name = "rustls-webpki" +version = "0.103.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53" +dependencies = [ + "ring", + "rustls-pki-types", + "untrusted", +] + +[[package]] +name = "rustpython-ast" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cdaf8ee5c1473b993b398c174641d3aa9da847af36e8d5eb8291930b72f31a5" +dependencies = [ + "is-macro", + "malachite-bigint", + "rustpython-parser-core", + "static_assertions", +] + +[[package]] +name = "rustpython-parser" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "868f724daac0caf9bd36d38caf45819905193a901e8f1c983345a68e18fb2abb" +dependencies = [ + "anyhow", + "is-macro", + "itertools 0.11.0", + "lalrpop-util", + "log", + "malachite-bigint", + "num-traits", + "phf", + "phf_codegen", + "rustc-hash 1.1.0", + "rustpython-ast", + "rustpython-parser-core", + "tiny-keccak", + "unic-emoji-char", + "unic-ucd-ident", + "unicode_names2", +] + +[[package]] +name = "rustpython-parser-core" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4b6c12fa273825edc7bccd9a734f0ad5ba4b8a2f4da5ff7efe946f066d0f4ad" +dependencies = [ + "is-macro", + "memchr", + "rustpython-parser-vendored", +] + +[[package]] +name = "rustpython-parser-vendored" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04fcea49a4630a3a5d940f4d514dc4f575ed63c14c3e3ed07146634aed7f67a6" +dependencies = [ + "memchr", + "once_cell", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "ryu" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" + +[[package]] +name = "safetensors" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44560c11236a6130a46ce36c836a62936dc81ebf8c36a37947423571be0e55b6" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "schannel" +version = "0.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "schemars" +version = "0.8.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615" +dependencies = [ + "dyn-clone", + "schemars_derive", + "serde", + "serde_json", +] + +[[package]] +name = "schemars" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd191f9397d57d581cddd31014772520aa448f65ef991055d7f61582c65165f" +dependencies = [ + "dyn-clone", + "ref-cast", + "serde", + "serde_json", +] + +[[package]] +name = "schemars" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2b42f36aa1cd011945615b92222f6bf73c599a102a300334cd7f8dbeec726cc" +dependencies = [ + "dyn-clone", + "ref-cast", + "serde", + "serde_json", +] + +[[package]] +name = "schemars_derive" +version = "0.8.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32e265784ad618884abaea0600a9adf15393368d840e0222d101a072f3f7534d" +dependencies = [ + "proc-macro2", + "quote", + "serde_derive_internals", + "syn 2.0.117", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "security-framework" +version = "3.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" +dependencies = [ + "bitflags 2.11.0", + "core-foundation 0.10.1", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "semver" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" + +[[package]] +name = "seq-macro" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde-big-array" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11fc7cc2c76d73e0f27ee52abbd64eec84d46f370c88371120433196934e4b7f" +dependencies = [ + "serde", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "serde_derive_internals" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "indexmap 2.13.0", + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "serde_path_to_error" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457" +dependencies = [ + "itoa", + "serde", + "serde_core", +] + +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "serde_with" +version = "3.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "381b283ce7bc6b476d903296fb59d0d36633652b633b27f64db4fb46dcbfc3b9" +dependencies = [ + "base64 0.22.1", + "chrono", + "hex", + "indexmap 1.9.3", + "indexmap 2.13.0", + "schemars 0.9.0", + "schemars 1.2.1", + "serde_core", + "serde_json", + "serde_with_macros", + "time", +] + +[[package]] +name = "serde_with_macros" +version = "3.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6d4e30573c8cb306ed6ab1dca8423eec9a463ea0e155f45399455e0368b27e0" +dependencies = [ + "darling 0.21.3", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "signal-hook" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d881a16cf4426aa584979d30bd82cb33429027e42122b169753d6ef1085ed6e2" +dependencies = [ + "libc", + "signal-hook-registry", +] + +[[package]] +name = "signal-hook-mio" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b75a19a7a740b25bc7944bdee6172368f988763b744e3d4dfe753f6b4ece40cc" +dependencies = [ + "libc", + "mio 0.8.11", + "mio 1.1.1", + "signal-hook", +] + +[[package]] +name = "signal-hook-registry" +version = "1.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b" +dependencies = [ + "errno", + "libc", +] + +[[package]] +name = "simd-adler32" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" + +[[package]] +name = "siphasher" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" + +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "socket2" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "socks" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0c3dbbd9ae980613c6dd8e28a9407b50509d3803b57624d5dfe8315218cd58b" +dependencies = [ + "byteorder", + "libc", + "winapi", +] + +[[package]] +name = "special-tokens-extraction" +version = "0.1.0" +dependencies = [ + "vllm-rs", +] + +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +dependencies = [ + "lock_api", +] + +[[package]] +name = "spm_precompiled" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5851699c4033c63636f7ea4cf7b7c1f1bf06d0cc03cfb42e711de5a5c46cf326" +dependencies = [ + "base64 0.13.1", + "nom", + "serde", + "unicode-segmentation", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "strip-ansi-escapes" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a8f8038e7e7969abb3f1b7c2a811225e9296da208539e0f79c5251d6cac0025" +dependencies = [ + "vte", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "strum" +version = "0.26.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" + +[[package]] +name = "strum" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" +dependencies = [ + "strum_macros 0.27.2", +] + +[[package]] +name = "strum_macros" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.117", +] + +[[package]] +name = "strum_macros" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "sync_wrapper" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +dependencies = [ + "futures-core", +] + +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "sysctl" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec7dddc5f0fee506baf8b9fdb989e242f17e4b11c61dfbb0635b705217199eea" +dependencies = [ + "bitflags 2.11.0", + "byteorder", + "enum-as-inner", + "libc", + "thiserror 1.0.69", + "walkdir", +] + +[[package]] +name = "sysctl" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01198a2debb237c62b6826ec7081082d951f46dbb64b0e8c7649a452230d1dfc" +dependencies = [ + "bitflags 2.11.0", + "byteorder", + "enum-as-inner", + "libc", + "thiserror 1.0.69", + "walkdir", +] + +[[package]] +name = "sysinfo" +version = "0.37.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16607d5caffd1c07ce073528f9ed972d88db15dd44023fa57142963be3feb11f" +dependencies = [ + "libc", + "memchr", + "ntapi", + "objc2-core-foundation", + "objc2-io-kit", + "windows", +] + +[[package]] +name = "system-configuration" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a13f3d0daba03132c0aa9767f98351b3488edc2c100cda2d2ec2b04f3d8d3c8b" +dependencies = [ + "bitflags 2.11.0", + "core-foundation 0.9.4", + "system-configuration-sys", +] + +[[package]] +name = "system-configuration-sys" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "tempfile" +version = "3.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82a72c767771b47409d2345987fda8628641887d5466101319899796367354a0" +dependencies = [ + "fastrand", + "getrandom 0.4.2", + "once_cell", + "rustix 1.1.4", + "windows-sys 0.61.2", +] + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl 2.0.18", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "tiff" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af9605de7fee8d9551863fd692cce7637f548dbd9db9180fcc07ccc6d26c336f" +dependencies = [ + "fax", + "flate2", + "half", + "quick-error", + "weezl", + "zune-jpeg 0.4.21", +] + +[[package]] +name = "time" +version = "0.3.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" +dependencies = [ + "deranged", + "itoa", + "num-conv", + "powerfmt", + "serde_core", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" + +[[package]] +name = "time-macros" +version = "0.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" +dependencies = [ + "num-conv", + "time-core", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + +[[package]] +name = "tinystr" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" +dependencies = [ + "displaydoc", + "zerovec", +] + +[[package]] +name = "tinyvec" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "tokenizers" +version = "0.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a620b996116a59e184c2fa2dfd8251ea34a36d0a514758c6f966386bd2e03476" +dependencies = [ + "ahash", + "aho-corasick", + "compact_str", + "dary_heap", + "derive_builder", + "esaxx-rs", + "fancy-regex", + "getrandom 0.3.4", + "hf-hub", + "indicatif", + "itertools 0.14.0", + "log", + "macro_rules_attribute", + "monostate", + "onig", + "paste", + "rand 0.9.2", + "rayon", + "rayon-cond", + "regex", + "regex-syntax", + "serde", + "serde_json", + "spm_precompiled", + "thiserror 2.0.18", + "unicode-normalization-alignments", + "unicode-segmentation", + "unicode_categories", +] + +[[package]] +name = "tokio" +version = "1.50.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d" +dependencies = [ + "bytes", + "libc", + "mio 1.1.1", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "socket2", + "tokio-macros", + "windows-sys 0.61.2", +] + +[[package]] +name = "tokio-macros" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c55a2eff8b69ce66c84f85e1da1c233edc36ceb85a2058d11b0d6a3c7e7569c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "tokio-native-tls" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" +dependencies = [ + "native-tls", + "tokio", +] + +[[package]] +name = "tokio-rustls" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" +dependencies = [ + "rustls", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "toktrie" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06f161a10057ac758fe4d1bac8d86b1ce07e8a2255ee4f9eec1d408bed7be125" +dependencies = [ + "anyhow", + "bytemuck", + "bytemuck_derive", + "serde", + "serde_json", +] + +[[package]] +name = "toktrie_hf_tokenizers" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9caa3495044651ace1f33dff8d8d083142984b7014f926365ca8e003531df65" +dependencies = [ + "anyhow", + "log", + "serde", + "serde_json", + "tokenizers", + "toktrie", +] + +[[package]] +name = "toml_datetime" +version = "1.0.0+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32c2555c699578a4f59f0cc68e5116c8d7cabbd45e1409b989d4be085b53f13e" +dependencies = [ + "serde_core", +] + +[[package]] +name = "toml_edit" +version = "0.25.4+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7193cbd0ce53dc966037f54351dbbcf0d5a642c7f0038c382ef9e677ce8c13f2" +dependencies = [ + "indexmap 2.13.0", + "toml_datetime", + "toml_parser", + "winnow", +] + +[[package]] +name = "toml_parser" +version = "1.0.9+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "702d4415e08923e7e1ef96cd5727c0dfed80b4d2fa25db9647fe5eb6f7c5a4c4" +dependencies = [ + "winnow", +] + +[[package]] +name = "tool-parser" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b68dc2823cbc35b9033e31372afdd23d0d1ce27f593afd9c70221a86a9102f7b" +dependencies = [ + "async-trait", + "num-traits", + "openai-protocol", + "parking_lot", + "regex", + "rustpython-parser", + "serde", + "serde_json", + "thiserror 2.0.18", + "tokio", + "tracing", +] + +[[package]] +name = "tower" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" +dependencies = [ + "futures-core", + "futures-util", + "pin-project-lite", + "sync_wrapper", + "tokio", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower-http" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" +dependencies = [ + "bitflags 2.11.0", + "bytes", + "futures-core", + "futures-util", + "http", + "http-body", + "http-body-util", + "http-range-header", + "httpdate", + "iri-string", + "mime", + "mime_guess", + "percent-encoding", + "pin-project-lite", + "tokio", + "tokio-util", + "tower", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower-layer" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" + +[[package]] +name = "tower-service" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" + +[[package]] +name = "tqdm" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b316d5c2ac649ca856dacd487d0ebb94f3b746bada51355d93dd2c007ab62a2e" +dependencies = [ + "anyhow", + "crossterm 0.25.0", + "once_cell", +] + +[[package]] +name = "tracing" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" +dependencies = [ + "log", + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "tracing-core" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex-automata", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + +[[package]] +name = "twox-hash" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" +dependencies = [ + "rand 0.9.2", +] + +[[package]] +name = "ug" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03719c61a91b51541f076dfdba45caacf750b230cefaa4b32d6f5411c3f7f437" +dependencies = [ + "gemm 0.18.2", + "half", + "libloading", + "memmap2", + "num", + "num-traits", + "num_cpus", + "rayon", + "safetensors", + "serde", + "thiserror 1.0.69", + "tracing", + "yoke 0.7.5", +] + +[[package]] +name = "unic-char-property" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8c57a407d9b6fa02b4795eb81c5b6652060a15a7903ea981f3d723e6c0be221" +dependencies = [ + "unic-char-range", +] + +[[package]] +name = "unic-char-range" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0398022d5f700414f6b899e10b8348231abf9173fa93144cbc1a43b9793c1fbc" + +[[package]] +name = "unic-common" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc" + +[[package]] +name = "unic-emoji-char" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b07221e68897210270a38bde4babb655869637af0f69407f96053a34f76494d" +dependencies = [ + "unic-char-property", + "unic-char-range", + "unic-ucd-version", +] + +[[package]] +name = "unic-ucd-ident" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e230a37c0381caa9219d67cf063aa3a375ffed5bf541a452db16e744bdab6987" +dependencies = [ + "unic-char-property", + "unic-char-range", + "unic-ucd-version", +] + +[[package]] +name = "unic-ucd-version" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96bd2f2237fe450fcd0a1d2f5f4e91711124f7857ba2e964247776ebeeb7b0c4" +dependencies = [ + "unic-common", +] + +[[package]] +name = "unicase" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-normalization-alignments" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43f613e4fa046e69818dd287fdc4bc78175ff20331479dab6e1b0f98d57062de" +dependencies = [ + "smallvec", +] + +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "unicode_categories" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" + +[[package]] +name = "unicode_names2" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1673eca9782c84de5f81b82e4109dcfb3611c8ba0d52930ec4a9478f547b2dd" +dependencies = [ + "phf", + "unicode_names2_generator", +] + +[[package]] +name = "unicode_names2_generator" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b91e5b84611016120197efd7dc93ef76774f4e084cd73c9fb3ea4a86c570c56e" +dependencies = [ + "getopts", + "log", + "phf_codegen", + "rand 0.8.5", +] + +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + +[[package]] +name = "ureq" +version = "2.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02d1a66277ed75f640d608235660df48c8e3c19f3b4edb6a263315626cc3c01d" +dependencies = [ + "base64 0.22.1", + "flate2", + "log", + "native-tls", + "once_cell", + "rustls", + "rustls-pki-types", + "serde", + "serde_json", + "socks", + "url", + "webpki-roots 0.26.11", +] + +[[package]] +name = "url" +version = "2.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "utoipa" +version = "4.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5afb1a60e207dca502682537fefcfd9921e71d0b83e9576060f09abc6efab23" +dependencies = [ + "indexmap 2.13.0", + "serde", + "serde_json", + "utoipa-gen", +] + +[[package]] +name = "utoipa-gen" +version = "4.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20c24e8ab68ff9ee746aad22d39b5535601e6416d1b0feeabf78be986a5c4392" +dependencies = [ + "proc-macro-error", + "proc-macro2", + "quote", + "regex", + "syn 2.0.117", +] + +[[package]] +name = "uuid" +version = "1.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a68d3c8f01c0cfa54a75291d83601161799e4a89a39e0929f4b0354d88757a37" +dependencies = [ + "getrandom 0.4.2", + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "validator" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43fb22e1a008ece370ce08a3e9e4447a910e92621bb49b85d6e48a45397e7cfa" +dependencies = [ + "idna", + "once_cell", + "regex", + "serde", + "serde_derive", + "serde_json", + "url", + "validator_derive", +] + +[[package]] +name = "validator_derive" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7df16e474ef958526d1205f6dda359fdfab79d9aa6d54bafcb92dcd07673dca" +dependencies = [ + "darling 0.20.11", + "once_cell", + "proc-macro-error2", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "vllm-rs" +version = "0.9.7" +dependencies = [ + "ahash", + "akin", + "anyhow", + "attention-rs", + "axum", + "base64 0.22.1", + "bincode", + "bytemuck", + "candle-core", + "candle-nn", + "clap", + "colored", + "crossbeam", + "ctrlc", + "dirs 5.0.1", + "either", + "flume", + "futures", + "half", + "hf-hub", + "image", + "indicatif", + "interprocess", + "itertools 0.13.0", + "lazy_static", + "llguidance", + "local-ip-address", + "minijinja", + "minijinja-contrib", + "once_cell", + "openai-protocol", + "parking_lot", + "rand 0.9.2", + "rayon", + "reedline", + "regex", + "reqwest", + "rmp-serde", + "rustchatui", + "schemars 0.8.22", + "serde", + "serde-big-array", + "serde_json", + "sysinfo", + "thiserror 1.0.69", + "tokenizers", + "tokio", + "toktrie", + "toktrie_hf_tokenizers", + "tool-parser", + "tower-http", + "tqdm", + "tracing", + "tracing-subscriber", + "twox-hash", + "url", + "utoipa", + "uuid", +] + +[[package]] +name = "vte" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "231fdcd7ef3037e8330d8e17e61011a2c244126acc0a982f4040ac3f9f0bc077" +dependencies = [ + "memchr", +] + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasip2" +version = "1.0.2+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9c5522b3a28661442748e09d40924dfb9ca614b21c00d3fd135720e48b67db8" +dependencies = [ + "cfg-if", + "futures-util", + "js-sys", + "once_cell", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn 2.0.117", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap 2.13.0", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasm-streams" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags 2.11.0", + "hashbrown 0.15.5", + "indexmap 2.13.0", + "semver", +] + +[[package]] +name = "web-sys" +version = "0.3.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "webpki-roots" +version = "0.26.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" +dependencies = [ + "webpki-roots 1.0.6", +] + +[[package]] +name = "webpki-roots" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed" +dependencies = [ + "rustls-pki-types", +] + +[[package]] +name = "weezl" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28ac98ddc8b9274cb41bb4d9d4d5c425b6020c50c46f25559911905610b4a88" + +[[package]] +name = "widestring" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72069c3113ab32ab29e5584db3c6ec55d416895e60715417b5b883a357c3e471" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows" +version = "0.61.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893" +dependencies = [ + "windows-collections", + "windows-core 0.61.2", + "windows-future", + "windows-link 0.1.3", + "windows-numerics", +] + +[[package]] +name = "windows-collections" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8" +dependencies = [ + "windows-core 0.61.2", +] + +[[package]] +name = "windows-core" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link 0.1.3", + "windows-result 0.3.4", + "windows-strings 0.4.2", +] + +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link 0.2.1", + "windows-result 0.4.1", + "windows-strings 0.5.1", +] + +[[package]] +name = "windows-future" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e" +dependencies = [ + "windows-core 0.61.2", + "windows-link 0.1.3", + "windows-threading", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "windows-link" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-numerics" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1" +dependencies = [ + "windows-core 0.61.2", + "windows-link 0.1.3", +] + +[[package]] +name = "windows-registry" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02752bf7fbdcce7f2a27a742f798510f3e5ad88dbe84871e5168e2120c3d5720" +dependencies = [ + "windows-link 0.2.1", + "windows-result 0.4.1", + "windows-strings 0.5.1", +] + +[[package]] +name = "windows-result" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" +dependencies = [ + "windows-link 0.1.3", +] + +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link 0.2.1", +] + +[[package]] +name = "windows-strings" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" +dependencies = [ + "windows-link 0.1.3", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link 0.2.1", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.5", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link 0.2.1", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm 0.52.6", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link 0.2.1", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", +] + +[[package]] +name = "windows-threading" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b66463ad2e0ea3bbf808b7f1d371311c80e115c0b71d60efc142cafbcfb057a6" +dependencies = [ + "windows-link 0.1.3", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" + +[[package]] +name = "winnow" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df79d97927682d2fd8adb29682d1140b343be4ac0f08fd68b7765d9c059d3945" +dependencies = [ + "memchr", +] + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap 2.13.0", + "prettyplease", + "syn 2.0.117", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn 2.0.117", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags 2.11.0", + "indexmap 2.13.0", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap 2.13.0", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] + +[[package]] +name = "writeable" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" + +[[package]] +name = "yoke" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive 0.7.5", + "zerofrom", +] + +[[package]] +name = "yoke" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" +dependencies = [ + "stable_deref_trait", + "yoke-derive 0.8.1", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", + "synstructure", +] + +[[package]] +name = "yoke-derive" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", + "synstructure", +] + +[[package]] +name = "zerocopy" +version = "0.8.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a789c6e490b576db9f7e6b6d661bcc9799f7c0ac8352f56ea20193b2681532e5" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f65c489a7071a749c849713807783f70672b28094011623e200cb86dcb835953" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "zerofrom" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", + "synstructure", +] + +[[package]] +name = "zeroize" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" + +[[package]] +name = "zerotrie" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" +dependencies = [ + "displaydoc", + "yoke 0.8.1", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" +dependencies = [ + "yoke 0.8.1", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "zip" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cc23c04387f4da0374be4533ad1208cbb091d5c11d070dfef13676ad6497164" +dependencies = [ + "arbitrary", + "crc32fast", + "crossbeam-utils", + "displaydoc", + "indexmap 2.13.0", + "num_enum", + "thiserror 1.0.69", +] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" + +[[package]] +name = "zune-core" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f423a2c17029964870cfaabb1f13dfab7d092a62a29a89264f4d36990ca414a" + +[[package]] +name = "zune-core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb8a0807f7c01457d0379ba880ba6322660448ddebc890ce29bb64da71fb40f9" + +[[package]] +name = "zune-jpeg" +version = "0.4.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29ce2c8a9384ad323cf564b67da86e21d3cfdff87908bc1223ed5c99bc792713" +dependencies = [ + "zune-core 0.4.12", +] + +[[package]] +name = "zune-jpeg" +version = "0.5.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "410e9ecef634c709e3831c2cfdb8d9c32164fae1c67496d5b68fff728eec37fe" +dependencies = [ + "zune-core 0.5.1", +] diff --git a/example/special-tokens-extraction/Cargo.toml b/example/special-tokens-extraction/Cargo.toml new file mode 100644 index 00000000..b6945fcf --- /dev/null +++ b/example/special-tokens-extraction/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "special-tokens-extraction" +version = "0.1.0" +edition = "2024" + +[dependencies] +vllm-rs = { path = "../.." } diff --git a/example/special-tokens-extraction/README.md b/example/special-tokens-extraction/README.md new file mode 100644 index 00000000..2688520e --- /dev/null +++ b/example/special-tokens-extraction/README.md @@ -0,0 +1,195 @@ +# Special Tokens Extraction Tool + +This example tool helps developers analyze tokenizer special tokens visually using the `SpecialTokens` module from vllm.rs. + +## Purpose + +When integrating new models or tokenizer configurations, it's essential to understand: +- Which token IDs correspond to special tokens (EOS, BOS, PAD, TOOL, etc.) +- How the tokenizer encodes model-specific special tokens +- Whether custom token rules are needed for new model formats + +This tool extracts and displays all special tokens from a tokenizer file, making it easy to visualize and verify the token mapping. + +## Usage + +### Basic Usage + +```bash +# Using default tokenizer.json in current directory +cargo run --example special-tokens-extraction + +# Using a custom tokenizer path +cargo run --example special-tokens-extraction -- /path/to/tokenizer.json +``` + +### Example Output + +``` +=== Testing Tokenizer Library === + +Successfully loaded tokenizer from: tokenizer.json +Total added tokens processed. + +--- EOS Tokens --- +EOS: id=2 token= +EOS: id=128001 token=<|end_of_text|> +EOS IDs: [2, 128001] +EOS Strings: ["", "<|end_of_text|>"] + +--- PAD Tokens --- +PAD: id=0 token= + +--- BOS Tokens --- +BOS: id=1 token= + +--- TOOL Tokens --- +TOOL: id=151657 token= + +--- ROLE Tokens --- +ROLE: id=128007 token=_ROLE +ROLE: id=128008 token=ROLE_ + +--- MASK Tokens --- +MASK: id=32000 token= + +--- REASONING Tokens --- +REASONING: id=1 token= + +--- OTHER Tokens --- +OTHER: id=0 token= +OTHER: id=1 token= +OTHER: id=2 token= +OTHER: id=3 token= +``` + +## Token Categories + +The tool classifies tokens into the following categories: + +| Category | Description | Example Tokens | +|----------|-------------|----------------| +| `EOS` | End of sequence tokens | ``, `<|end_of_text|>`, `` | +| `PAD` | Padding tokens | ``, `` | +| `BOS` | Beginning of sequence tokens | ``, `<|start_of_turn|>` | +| `SEP` | Separator tokens | ``, `<|separator|>` | +| `CLS` | Classification tokens | ``, `[CLS]` | +| `MASK` | Mask tokens for masking | ``, `[MASK]` | +| `TOOL` | Tool-related tokens | ``, `<|tool|>` | +| `FUNCTION` | Function tokens | ``, `<|function|>` | +| `PARAMETER` | Parameter tokens | ``, `<|parameter|>` | +| `ROLE` | Role tokens (chat templates) | `ROLE`, `ROLE_`, `<|role|>` | +| `CONTENT_TYPE` | Content type tokens | ``, `<|content_type|>` | +| `REASONING` | Reasoning/thinking tokens | ``, ``, `` | +| `OTHER` | Unmatched tokens | ``, etc. | + +## Understanding SpecialTokens Rules + +The `SpecialTokens` struct uses a flexible matching system based on `MatchRule`: + +### MatchRule Types + +```rust +pub enum MatchRule { + Exact(String), // Exact match: "" matches "" + StartsWith(String), // Prefix match: "<|end" matches "<|end_of_text|>" + Contains(String), // Substring match: "tool" matches "" + And(Box, Box), // Both rules must match + Or(Box, Box), // Either rule must match + Not(Box), // Rule must NOT match +} +``` + +### Default Rules + +The `default_rules()` function in `src/utils/special_tokens.rs` defines matching rules for all categories. + +## Customizing Token Rules + +To add support for new token patterns: + +1. Edit `src/utils/special_tokens.rs` +2. Add new rules to `default_rules()`: + +```rust +// Example: Add custom thinking token +(MatchRule::Contains("custom_thinking".to_string()), Category::Reasoning), + +// Example: Custom tool start token +(MatchRule::Exact("".to_string()), Category::Tool), +``` + +3. Test with the extraction tool: +```bash +cargo run --example special-tokens-extraction -- /path/to/tokenizer.json +``` + +## Integration with vllm.rs + +The `SpecialTokens` struct is used throughout vllm.rs: + +### Engine Initialization + +```rust +// src/core/engine.rs:474 +let special_tokens = Arc::new(SpecialTokens::new(&tokenizer)); +``` + +### Scheduler Usage + +```rust +// src/core/scheduler.rs:135 +eos_token_id: special_tokens.eos_ids(), +``` + +### Guidance/LLG Usage + +```rust +// src/utils/guidance.rs:485 +pub fn chat_text_expression_with_eos(special_tokens: &SpecialTokens) -> String { + let eos_token_ids = special_tokens.eos_ids(); + // ... build TEXT pattern with EOS tokens +} +``` + +## Troubleshooting + +### Common Issues + +1. **Empty token list** + - Check that the tokenizer file path is correct + - Verify the tokenizer has added tokens (some tokenizers use vocab tokens) + +2. **Tokens not classified correctly** + - Add custom rules in `src/utils/special_tokens.rs` + - Use `search(None, Some("substring"))` to debug token matching + +3. **Token ID collisions** + - The `SpecialTokens::new()` implementation deduplicates by token ID + - Check with `search(Some(token_id), None)` to verify uniqueness + +### Debugging with the Search API + +```rust +// Search by ID +let matches = special_tokens.search(Some(151657), None); +for m in matches { + println!("ID 151657: {} -> {}", m.category, m.content); +} + +// Search by substring +let matches = special_tokens.search(None, Some("tool")); +for m in matches { + println!("Contains 'tool': {} -> {}", m.category, m.content); +} +``` + +## File Reference + +- **Source**: `src/utils/special_tokens.rs` +- **Example**: `example/special-tokens-extraction/src/main.rs` +- **Tests**: `src/utils/special_tokens.rs` (test module at end of file) + +## License + +This example is part of the vllm.rs project. diff --git a/example/special-tokens-extraction/src/main.rs b/example/special-tokens-extraction/src/main.rs new file mode 100644 index 00000000..009961ad --- /dev/null +++ b/example/special-tokens-extraction/src/main.rs @@ -0,0 +1,82 @@ +use vllm_rs::utils::special_tokens::SpecialTokens; +use std::env; + +fn main() { + println!("=== Testing Tokenizer Library ===\n"); + + // Path to our mock tokenizer file + let args: Vec = env::args().collect(); + let tokenizer_path = if args.len() > 1 { + args[1].clone() + } else { + "./tokenizer.json".to_string() + }; + + let special = SpecialTokens::new_from_file(&tokenizer_path); + + let reasoning_matches = special.search(None, Some("think")); + for m in reasoning_matches { + println!("Search Result - Category: {:?}, ID: {}, Content: {}", m.category, m.id, m.content); + } + + println!("Successfully loaded tokenizer from: {}", tokenizer_path); + println!("Total added tokens processed.\n"); + + // Test Eos + println!("--- EOS Tokens ---"); + for (id, s) in special.eos_tokens() { + println!("EOS: id={} token={}", id, s); + } + println!("EOS IDs: {:?}", special.eos_ids()); + println!("EOS Strings: {:?}", special.eos_strings()); + println!(); + + // Test Pad + println!("--- PAD Tokens ---"); + for (id, s) in special.pad_tokens() { + println!("PAD: id={} token={}", id, s); + } + println!(); + + // Test Bos + println!("--- BOS Tokens ---"); + for (id, s) in special.bos_tokens() { + println!("BOS: id={} token={}", id, s); + } + println!(); + + // Test Tool + println!("--- TOOL Tokens ---"); + for (id, s) in special.tool_tokens() { + println!("TOOL: id={} token={}", id, s); + } + println!(); + + // Test Role + println!("--- ROLE Tokens ---"); + for (id, s) in special.role_tokens() { + println!("ROLE: id={} token={}", id, s); + } + println!(); + + // Test Mask + println!("--- MASK Tokens ---"); + for (id, s) in special.mask_tokens() { + println!("MASK: id={} token={}", id, s); + } + println!(); + + // Test Reasoning + println!("--- REASONING Tokens ---"); + for (id, s) in special.reasoning_tokens() { + println!("REASONING: id={} token={}", id, s); + } + println!(); + + // Test Other (Tokens that didn't match specific rules above, e.g., ) + println!("--- OTHER Tokens ---"); + for (id, s) in special.other_tokens() { + println!("OTHER: id={} token={}", id, s); + } +} + diff --git a/src/api.rs b/src/api.rs index 83b3bbf4..26870fd0 100644 --- a/src/api.rs +++ b/src/api.rs @@ -151,6 +151,8 @@ impl EngineBuilder { None, self.pd_server_prefix_cache_ratio, self.pd_client_prefix_cache_ratio, + false, // allow_constraint_api + false, // enable_tool_grammar ); let dtype = self.dtype.clone().map(dtype_to_str); diff --git a/src/core/block_manager.rs b/src/core/block_manager.rs index 1f9bfefa..dcafc49e 100644 --- a/src/core/block_manager.rs +++ b/src/core/block_manager.rs @@ -571,6 +571,11 @@ impl BlockManager { .map_or(0, |cache| cache.cached_blocks()) } + /// Get a reference to the runners Arc + pub fn get_runners(&self) -> &Arc> { + &self.runners + } + /// Returns how many tokens of `seq` are already cached in the prefix cache. /// Used to decide whether to do local prefill vs transfer to PD server. pub fn get_prefix_cache_match_tokens(&mut self, seq: &Sequence) -> usize { @@ -689,6 +694,11 @@ impl BlockManager { self.block_size } + /// Get the block size + pub fn block_size(&self) -> usize { + self.block_size + } + pub fn get_cpu_swap_usage(&self) -> f32 { let total_cpu_blocks = self.cpu_blocks.len(); (total_cpu_blocks - self.free_cpu_block_ids.len()) as f32 / total_cpu_blocks as f32 @@ -951,4 +961,67 @@ impl BlockManager { } } } + + /// Rollback a sequence to a specific token position, releasing blocks beyond that point. + /// This is used for speculative decoding mismatch recovery. + pub fn rollback_to_seq_tokens(&mut self, seq: &mut Sequence, target_tokens: usize) -> Result<()> { + let current_tokens = seq.len(); + if target_tokens >= current_tokens { + return Ok(()); // Nothing to rollback + } + + // Calculate how many blocks to release + let target_blocks = target_tokens.div_ceil(self.block_size); + let blocks_to_release = current_tokens.div_ceil(self.block_size) - target_blocks; + + if blocks_to_release > 0 { + // Release blocks from the end + let released: Vec = seq.block_table.drain(target_blocks..).collect(); + for &block_id in &released { + let block_id_usize = block_id as usize; + self.decrement_block_ref(block_id_usize); + } + } + + // Update cached token count + let target_full_blocks = target_tokens / self.block_size; + seq.num_cached_tokens = target_full_blocks * self.block_size; + + // Update prefix cache if enabled + if self.prefix_cache.is_some() { + // Extract prefix_cache to avoid borrow conflicts + let mut prefix_cache = self.prefix_cache.take().unwrap(); + + // Calculate which hashes correspond to released blocks + let target_full_blocks = target_tokens / self.block_size; + let current_full_blocks = current_tokens / self.block_size; + + // Collect hashes to remove + let mut hashes_to_remove = Vec::new(); + + for block_idx in target_full_blocks..current_full_blocks { + // Get the block_id for this position before release + if let Some(&block_id_u32) = seq.block_table.get(block_idx) { + let block_id = block_id_u32 as usize; + + // Find the hash associated with this block_id + if let Some(hash) = prefix_cache.hash_for_block(block_id) { + hashes_to_remove.push(hash); + } + } + } + + // Remove hashes from prefix cache and mamba mappings + for hash in hashes_to_remove { + if prefix_cache.remove_hash(&hash).is_some() { + self.invalidate_mamba_prefix_hash(hash); + } + } + + // Put prefix_cache back + self.prefix_cache = Some(prefix_cache); + } + + Ok(()) + } } diff --git a/src/core/engine.rs b/src/core/engine.rs index 0812b5a3..af629fa6 100644 --- a/src/core/engine.rs +++ b/src/core/engine.rs @@ -20,7 +20,8 @@ use crate::transfer::PdRole; use crate::transfer::Transfer; use crate::utils::chat_template::Message; use crate::utils::config::{EngineConfig, EosTokenId, ModelType, SamplingParams}; -use crate::utils::guidance::load_toktrie_from_path; +use crate::utils::special_tokens::SpecialTokens; +use crate::utils::guidance::{build_llg_factory, load_toktrie_from_path}; use crate::utils::heartbeat::heartbeat_worker; use crate::utils::image::{get_image_config, ImageData, ImageProcessConfig}; use crate::utils::kvcache_allocator::KVCacheAllocator; @@ -100,6 +101,9 @@ pub struct LLMEngine { pub model_type: ModelType, pub tool_config: ToolConfig, pub img_cfg: Option, + /// SpecialTokens parsed once at engine initialization + /// Contains EOS, BOS, and other special token IDs and their string representations + pub special_tokens: Arc, } impl LLMEngine { @@ -107,9 +111,23 @@ impl LLMEngine { pub fn new(econfig: &EngineConfig, dtype: DType) -> Result>> { let (model_pathes, is_gguf, mut config, config_tokenizer, tokenizer, mut generation_cfg) = init_config_tokenizer(econfig)?; - let toktrie = load_toktrie_from_path(&model_pathes.get_tokenizer_filename()).map(Arc::new); + let toktrie = match load_toktrie_from_path(&model_pathes.get_tokenizer_filename()) { + Ok(trie) => Some(Arc::new(trie)), + Err(e) => { + crate::log_warn!("Failed to load tokenizer trie: {}", e); + None + } + }; + let llg_factory = match build_llg_factory(tokenizer.clone(), config.vocab_size) { + Ok(f) => Some(f), + Err(e) => { + crate::log_warn!("Failed to build llguidance factory: {}", e); + None + } + }; + if toktrie.is_none() { - crate::log_warn!("Guided decoding disabled: tokenizer trie unavailable."); + crate::log_warn!("Guided decoding (legacy) disabled: tokenizer trie unavailable."); } let stop_flag = Arc::new(AtomicBool::new(false)); @@ -121,10 +139,23 @@ impl LLMEngine { // In case config file missing bos and eos configuratioin config.apply_generation_cfg(generation_cfg.as_ref()); if config.eos_token_id.is_none() { - if let Some(eos) = &config_tokenizer.eos_token { - if let Some(token) = tokenizer.get_vocab(true).get(eos).copied() { - config.eos_token_id = Some(EosTokenId::Single(token)); - }; + if let Some(eos_entry) = &config_tokenizer.eos_token { + // Extract all EOS tokens from the tokenizer vocabulary + let mut eos_tokens: Vec = Vec::new(); + for eos_token_str in &eos_entry.tokens { + if let Some(token) = tokenizer.get_vocab(true).get(eos_token_str).copied() { + if !eos_tokens.contains(&token) { + eos_tokens.push(token); + } + } + } + if !eos_tokens.is_empty() { + config.eos_token_id = if eos_tokens.len() == 1 { + Some(EosTokenId::Single(eos_tokens[0])) + } else { + Some(EosTokenId::Multiple(eos_tokens)) + }; + } } } assert!( @@ -209,7 +240,7 @@ impl LLMEngine { device.clone(), reporter, transfer, - toktrie.clone(), + llg_factory.clone(), None, )?; @@ -417,7 +448,7 @@ impl LLMEngine { None, config_tokenizer.chat_template.clone(), config_tokenizer.bos_token.clone(), - config_tokenizer.eos_token.clone(), + config_tokenizer.eos_token.as_ref().map(|e| e.tokens.join("|")), None, true, true, @@ -439,6 +470,9 @@ impl LLMEngine { "default".to_string() }; + // Initialize SpecialTokens once at engine startup + let special_tokens = Arc::new(SpecialTokens::new(&tokenizer)); + let engine = Arc::new(RwLock::new(Self { runners, scheduler, @@ -461,6 +495,7 @@ impl LLMEngine { tool_config, img_cfg, model_name, + special_tokens, })); Self::start_engine(engine.clone()); Ok(engine) @@ -1576,4 +1611,9 @@ impl LLMEngine { pub fn get_chat_template(&self) -> ChatTemplate { self.template.clone() } + + pub fn template_supports_tools(&self) -> bool { + self.template.supports_tools() + } + } diff --git a/src/core/mod.rs b/src/core/mod.rs index 95563506..d4abf4ef 100644 --- a/src/core/mod.rs +++ b/src/core/mod.rs @@ -78,6 +78,42 @@ macro_rules! log_warn { }; } +#[macro_export] +macro_rules! log_debug { + ($($arg:tt)*) => { + { + #[cfg(feature = "python")] + { + use colored::Colorize; + let s = format!($($arg)*); + println!("{}", String::from(s).truecolor(100, 100, 100)); + } + #[cfg(not(feature = "python"))] + { + tracing::debug!($($arg)*); + } + } + }; +} + +#[macro_export] +macro_rules! log_trace { + ($($arg:tt)*) => { + { + #[cfg(feature = "python")] + { + use colored::Colorize; + let s = format!($($arg)*); + println!("{}", String::from(s).truecolor(50, 50, 50)); + } + #[cfg(not(feature = "python"))] + { + tracing::trace!($($arg)*); + } + } + }; +} + #[macro_export] macro_rules! log_error { ($($arg:tt)*) => { diff --git a/src/core/prefix_cache.rs b/src/core/prefix_cache.rs index ea9c41b5..323ef271 100644 --- a/src/core/prefix_cache.rs +++ b/src/core/prefix_cache.rs @@ -323,6 +323,36 @@ impl PrefixCache { self.access_counter } + /// Remove a hash from the cache and update parent/children bookkeeping + /// Returns the removed block_id if found + pub fn remove_hash(&mut self, hash: &u64) -> Option { + let entry = self.entries.remove(hash)?; + let block_id = entry.block_id; + + // Update parent's children count + if let Some(parent_hash) = entry.parent { + if let Some(parent_entry) = self.entries.get_mut(&parent_hash) { + parent_entry.children -= 1; + if parent_entry.children == 0 { + self.leaf_set.insert(parent_hash); + self.touch_leaf(parent_hash); + } + } + } + + // Remove from leaf set + self.leaf_set.remove(&hash); + + Some(block_id) + } + + /// Find the hash associated with a block_id + pub fn hash_for_block(&self, block_id: usize) -> Option { + self.entries.iter() + .find(|(_, entry)| entry.block_id == block_id) + .map(|(hash, _)| *hash) + } + fn hash_block(parent_hash: u64, tokens: &[u32]) -> u64 { let mut hasher = std::collections::hash_map::DefaultHasher::new(); parent_hash.hash(&mut hasher); diff --git a/src/core/runner.rs b/src/core/runner.rs index 2c260121..92f41c4e 100644 --- a/src/core/runner.rs +++ b/src/core/runner.rs @@ -8,7 +8,9 @@ use crate::transfer::Transfer; use crate::utils::graph::{ planned_graph_capture_batches, CudaGraphFn, CudaGraphWrapper, GraphCapturer, ModelFn, }; -use crate::utils::guidance::GuidanceState; +use crate::utils::guidance::{GuidanceState, ParserFactory}; +// use crate::utils::guidance::{GuidanceState, ParserFactory, batch_mask_bias, early_exit_validate}; +use toktrie::SimpleVob; use crate::utils::image::compute_image_slice; use crate::utils::logits_processor::{LogitsProcessor, Sampling}; use crate::utils::progress::ProgressLike; @@ -35,10 +37,9 @@ use attention_rs::InputMetadata; use candle_core::{DType, Device, Result, Tensor, D}; use interprocess::local_socket::Stream as LocalStream; use parking_lot::RwLock; -use std::collections::{HashMap, HashSet}; +use std::collections::{hash_map::Entry, HashMap, HashSet}; use std::rc::Rc; use std::sync::{Arc, Mutex, MutexGuard}; -use toktrie::TokTrie; /// Cached sampling parameters computed once during prefill, reused during decode #[derive(Clone, Debug)] @@ -94,6 +95,9 @@ pub struct ModelRunner { seq_tokens: RwLock>>, restored_prefix_sequences: RwLock>, guidance_states: RwLock>, + guidance_failed: RwLock>, + guidance_mismatch: RwLock>, + llg_factory: Option>, transfer: Option>, /// Whether this runner is on the first rank (for logging) is_first_rank: bool, @@ -163,7 +167,7 @@ impl ModelRunner { device: Device, reporter: Arc>>, transfer: Option>, - toktrie: Option>, + llg_factory: Option>, stream: Option, ) -> Result { let model = crate::build_model!( @@ -417,12 +421,15 @@ impl ModelRunner { cached_sampling: RwLock::new(None), seq_tokens: RwLock::new(HashMap::new()), restored_prefix_sequences: RwLock::new(HashSet::new()), - guidance_states: RwLock::new(HashMap::new()), - transfer, - is_first_rank: comm.rank() == 0, - model_type, - }) - } + guidance_states: RwLock::new(HashMap::new()), + guidance_failed: RwLock::new(HashSet::new()), + guidance_mismatch: RwLock::new(HashSet::new()), + llg_factory, + transfer, + is_first_rank: comm.rank() == 0, + model_type, + }) + } pub fn get_kv_cache(&self) -> MutexGuard<'_, Vec<(Tensor, Tensor)>> { loop { @@ -1215,10 +1222,205 @@ impl ModelRunner { logits.to_owned() }; - let tokens = self + let logits = if let Some(factory) = &self.llg_factory { + let mut guidance_states = self.guidance_states.write(); + let mut guidance_failed = self.guidance_failed.write(); + let mut guidance_mismatch = self.guidance_mismatch.write(); + let mut modified = false; + let vocab_size = logits.dim(1)?; + + // We only materialize logits on CPU if at least one constraint mask applies. + + // We'll collect masks first to minimize holding locks or complex logic inside the loop + let mut masks: Vec<(usize, usize, SimpleVob)> = Vec::new(); // (seq_index, seq_id, mask) + + for (i, id) in seq_ids.iter().enumerate() { + let sampling_params = match &seqs { + Seqs::SeqRefs(refs) => &refs[i].sampling_params, + Seqs::DecodeVec(vec) => &vec[i].sampling_params, + }; + + if guidance_failed.contains(id) { + continue; + } + + // Use grammar directly from sampling_params + let grammar = match sampling_params.grammar.as_ref() { + Some(g) => g, + None => continue, + }; + + let state = match guidance_states.entry(*id) { + Entry::Occupied(entry) => entry.into_mut(), + Entry::Vacant(entry) => { + match GuidanceState::new_from_grammar(factory.clone(), grammar) { + Ok(state) => entry.insert(state), + Err(err) => { + guidance_failed.insert(*id); + crate::log_warn!( + "[Seq {}] Failed to create guidance state: {}. Disabling constraints for this sequence.", + id, + err + ); + continue; + } + } + } + }; + + if let Ok(Some(mask)) = state.compute_mask() { + masks.push((i, *id, mask)); + modified = true; + } + } + + if modified { + // Now we must convert to Vec, modify, and update logits + let mut logits_vec = logits.flatten_all()?.to_vec1::()?; + + for (seq_idx, seq_id, mask) in masks { + let start = seq_idx * vocab_size; + let end = start + vocab_size; + let row = &mut logits_vec[start..end]; + let mask_len = mask.len(); + + // Apply mask: set disallowed to -inf + // This iterates entire vocab, but check is fast + if mask_len == 0 { + if guidance_failed.insert(seq_id) { + crate::log_warn!( + "[Seq {}] Guidance mask length is 0. Disabling constraints for this sequence.", + seq_id + ); + } + continue; + } + + if mask_len != vocab_size && guidance_mismatch.insert(seq_id) { + crate::log_warn!( + "[Seq {}] Guidance mask size {} does not match vocab size {}. Clamping mask application.", + seq_id, + mask_len, + vocab_size + ); + // Snapshot is captured when constraint is first applied in GuidanceState::new() + // Rollback is handled via Matcher::rollback() in GuidanceState::rollback_to() + } + + let apply_len = std::cmp::min(vocab_size, mask_len); + for tok in 0..apply_len { + if !mask.is_allowed(tok as u32) { + row[tok] = f32::NEG_INFINITY; + } + } + if mask_len < vocab_size { + for tok in mask_len..vocab_size { + row[tok] = f32::NEG_INFINITY; + } + } + } + Tensor::from_vec(logits_vec, logits.shape(), &self.device)? + /* + // Use optimized batch mask bias function + batch_mask_bias( + &logits, + &masks.iter().map(|(seq_idx, _, mask)| (*seq_idx, mask.clone())).collect::>(), + vocab_size, + )? + */ + } else { + logits + } + + } else { + logits + }; + + let mut tokens = self .logit_processor .sample_with_strategy(&logits, &cached_params.sampling)?; + // Re-sample tokens that fail validation (hybrid approach) + if let Some(_factory) = &self.llg_factory { + let mut guidance_states = self.guidance_states.write(); + for (seq_idx, seq_id) in seq_ids.iter().enumerate() { + let token = tokens[seq_idx]; + + crate::log_trace!("[llg] Processing seq {} (idx {}): token {}", seq_id, seq_idx, token); + + if let Some(state) = guidance_states.get_mut(seq_id) { + if state.is_finished() { + crate::log_trace!("[llg] Matcher is stopped for seq {}, skipping validation", seq_id); + continue; + } + + let valid = state.validate_token(token); + crate::log_trace!("[llg] Token {} validation result: {}", token, valid); + + if valid { + crate::log_trace!("[llg] Token {} is valid, consuming for seq {}", token, seq_id); + let _ = state.commit_token(token); + } else { + crate::log_debug!("[llg] Token {} is invalid, computing mask for seq {}", token, seq_id); + let mask = match state.compute_mask_or_eos() { + Ok(m) => m, + Err(e) => { + crate::log_error!( + "[llg] Unable to compute mask for token {} due to {}", token, e + ); + continue; + } + }; + + crate::log_debug!("[llg] Applying bias to logits for seq {}", seq_id); + + // Memory-efficient: use flat vector with slice operations + let vocab_size = logits.dim(1)?; + let row_start = seq_idx * vocab_size; + let row_end = row_start + vocab_size; + + let mut row_vec = logits.clone().flatten_all()?.to_vec1::()?; + let row = &mut row_vec[row_start..row_end]; + + // Direct mask application: set disallowed tokens to -inf + for tok in 0..vocab_size { + if !mask.is_allowed(tok as u32) { + row[tok] = f32::NEG_INFINITY; + } + } + + // Create tensor with correct shape for re-sampling + let biased_tensor = Tensor::from_vec(row_vec, logits.shape(), logits.device())?; + + crate::log_debug!("[llg] Re-sampling with biased logits for seq {}", seq_id); + + // Use sample_with_strategy with proper cached params + let re_sampled = self.logit_processor.sample_with_strategy(&biased_tensor, &cached_params.sampling)?; + tokens[seq_idx] = re_sampled[seq_idx]; + + crate::log_debug!("[llg] Consuming re-sampled token {} for seq {}", tokens[seq_idx], seq_id); + let _ = state.commit_token(tokens[seq_idx]); + } + } else { + crate::log_debug!("[llg] No guidance state for seq {}", seq_id); + } + /* + // Use optimized early exit validation + let vocab_size = logits.dim(1)?; + early_exit_validate( + &mut guidance_states, + &seq_ids, + &mut tokens, + &logits, + vocab_size, + factory, + &cached_params.sampling, + &self.logit_processor, + )? + */ + } + } + // Track tokens for sequences when penalties are enabled if has_any_penalty { let mut seq_tokens = self.seq_tokens.write(); @@ -1233,6 +1435,8 @@ impl ModelRunner { } } } + + // Token commits are now done inline in the re-sample loop below Ok(tokens) } @@ -1411,19 +1615,55 @@ impl ModelRunner { pub fn clear_blocks(&self, _block_ids: Vec) -> Result { Ok(true) - // fn cache_clear(gpu_cache: &Vec<(Tensor, Tensor)>, block_ids: &Vec) -> Result { - // if gpu_cache.is_empty() || block_ids.is_empty() { - // return Ok(true); - // } + } - // for i in 0..gpu_cache.len() { - // cache::clear_blocks(&gpu_cache[i].0, block_ids)?; - // cache::clear_blocks(&gpu_cache[i].1, block_ids)?; - // } + /// Validate a sequence's output_ids against the grammar using llguidance + /// Returns Some(valid_token_count) if guidance exists, None if no constraint + pub fn validate_sequence_for_grammar(&self, seq_id: usize, output_ids: &[u32]) -> Option { + let mut guidance_states = self.guidance_states.write(); + let state = guidance_states.get_mut(&seq_id)?; + match state.validate_tokens(output_ids) { + Some(count) => Some(count), + None => None, + } + } - // Ok(true) - // } + /// Rollback guidance state for a sequence + /// This is called from Scheduler::rollback_sequence() to reset llguidance FSM state + pub fn rollback_sequence_for_guidance(&self, seq_id: usize, target_tokens: usize) -> Result<()> { + let mut guidance_states = self.guidance_states.write(); + let mut guidance_failed = self.guidance_failed.write(); + let mut guidance_mismatch = self.guidance_mismatch.write(); + + if let Some(state) = guidance_states.get_mut(&seq_id) { + // Calculate byte position (approx 4 bytes per token) + let target_bytes = target_tokens * 4; + match state.rollback_to(target_tokens, target_bytes) { + Ok(()) => {} + Err(e) => { + return Err(candle_core::Error::Msg(format!("Guidance rollback failed: {}", e))); + } + } + } + + // Clear failed and mismatch status for re-initialization + guidance_failed.remove(&seq_id); + guidance_mismatch.remove(&seq_id); + + Ok(()) + } - // cache_clear(&*self.get_kv_cache(), &block_ids) + /// Fast-forward and consume tokens guaranteed to be accepted by the grammar + /// This is used for speculative decoding optimization + pub fn consume_ff_tokens(&self, seq_id: usize) -> Result> { + let mut guidance_states = self.guidance_states.write(); + if let Some(state) = guidance_states.get_mut(&seq_id) { + match state.consume_ff_tokens() { + Ok(tokens) => Ok(tokens), + Err(e) => Err(candle_core::Error::Msg(format!("FF tokens failed: {}", e))), + } + } else { + Ok(Vec::new()) + } } } diff --git a/src/core/scheduler.rs b/src/core/scheduler.rs index 9c786d4a..2719b9d0 100644 --- a/src/core/scheduler.rs +++ b/src/core/scheduler.rs @@ -5,11 +5,11 @@ use super::{ prefix_cache::PrefixCacheConfig, sequence::{Sequence, SequenceStatus}, }; +use crate::tools::parser::prefix_could_be_tool; use crate::transfer::{PdConfig, PdRole}; use crate::utils::config::{Config, EngineConfig, EosTokenId}; use candle_core::Result; use parking_lot::RwLock; -use regex::Regex; use std::collections::VecDeque; use std::sync::Arc; use std::time::{SystemTime, UNIX_EPOCH}; @@ -21,17 +21,16 @@ pub struct Scheduler { transferred: VecDeque, pub block_manager: BlockManager, next_seq_id: usize, - eos_token_id: Vec, + /// Token IDs that represent the end of sequence + pub eos_token_id: Vec, /// Token IDs that represent the end of a tool call (e.g., tokens) tool_call_end_token_ids: Vec, /// Token IDs that represent the start of a tool call (used to avoid false end matches) tool_call_start_token_ids: Vec, /// Token ID for } character (used for JSON tool call detection) - json_end_token_id: Option, + json_end_token_ids: Vec, /// Tokenizer for decoding output to check JSON tool call patterns tokenizer: Option>, - /// Regex for detecting JSON tool calls - tool_call_regex: Regex, cfg: EngineConfig, pd_config: Option, is_last_prefill: bool, @@ -141,11 +140,8 @@ impl Scheduler { // Tool call end tokens will be set by engine after tokenizer is initialized tool_call_end_token_ids: Vec::new(), tool_call_start_token_ids: Vec::new(), - json_end_token_id: None, + json_end_token_ids: Vec::new(), tokenizer: None, - // Regex to match JSON tool call format: {"name": "...", "arguments": {...}} - // We use (?s) to allow dot matching newlines - tool_call_regex: Regex::new(r#"(?s)\{\s*"name"\s*:.*"arguments"\s*:.*\}\s*$"#).unwrap(), cfg: econfig.clone(), pd_config: econfig.pd_config.clone(), is_last_prefill: false, @@ -164,13 +160,21 @@ impl Scheduler { /// Set tokenizer for JSON tool call detection (called by engine after initialization) pub fn set_tokenizer(&mut self, tokenizer: Arc) { - // Get the token ID for "}" character - if let Ok(tokens) = tokenizer.encode("}", false) { - if let Some(&token_id) = tokens.get_ids().last() { - self.json_end_token_id = Some(token_id); - crate::log_info!("JSON end token ID (}}) set to: {}", token_id); + self.json_end_token_ids.clear(); + + for ch in ["}", "]"] { + if let Ok(tokens) = tokenizer.encode(ch, false) { + if let Some(&token_id) = tokens.get_ids().last() { + if !self.json_end_token_ids.contains(&token_id) { + self.json_end_token_ids.push(token_id); + } + } } } + + if !self.json_end_token_ids.is_empty() { + crate::log_info!("JSON end token IDs set to: {:?}", self.json_end_token_ids); + } self.tokenizer = Some(tokenizer); } @@ -182,6 +186,75 @@ impl Scheduler { id } + /// Check if the sequence has grammar validation failures + /// Uses ModelRunner::validate_sequence_for_grammar() to validate the entire output_ids sequence + /// Returns true if validation failed and rollback is needed + fn should_rollback_for_grammar(&mut self, seq_id: usize, output_ids: &[u32]) -> bool { + let runners = self.block_manager.get_runners(); + let runners_guard = runners.read(); + + if let RunnerType::Thread(model_runner) = &*runners_guard { + if let Some(valid_count) = model_runner.validate_sequence_for_grammar(seq_id, output_ids) { + return valid_count < output_ids.len(); + } + } + + false + } + + /// Rollback a sequence to a specific token position + /// This is called from postprocess() when grammar validation fails + /// The sequence is truncated and cache states are rolled back + pub fn rollback_sequence(&mut self, seq_id: usize, target_tokens: usize) -> Result<()> { + const MAX_ROLLBACK_ATTEMPTS: usize = 3; + + // Find the sequence + let seq = self.running.iter_mut() + .find(|s| s.id == seq_id) + .ok_or_else(|| candle_core::Error::msg(format!("Sequence {} not found", seq_id)))?; + + seq.guidance_rollback_count += 1; + + if seq.guidance_rollback_count > MAX_ROLLBACK_ATTEMPTS { + crate::log_error!( + "[Seq {}] Exceeded {} rollback attempts, marking as errored", + seq_id, MAX_ROLLBACK_ATTEMPTS + ); + seq.status = SequenceStatus::Finished; + return Ok(()); + } + + // Save current state as rollback snapshot (if not already saved) + if seq.rollback_snapshot.is_none() { + seq.save_rollback_snapshot(); + } + + // Get target block count + let target_blocks = target_tokens.div_ceil(self.block_manager.get_block_size()); + + // Truncate Sequence state + seq.token_ids.truncate(target_tokens); + seq.block_table.truncate(target_blocks); + seq.num_cached_tokens = target_blocks * self.block_manager.get_block_size(); + + // Rollback BlockManager (KV cache + prefix cache) + self.block_manager.rollback_to_seq_tokens(seq, target_tokens)?; + + // Rollback ModelRunner (llguidance FSM + Mamba state) + let runners = self.block_manager.get_runners().clone(); + { + let runners_guard = runners.read(); + if let RunnerType::Thread(model_runner) = &*runners_guard { + model_runner.rollback_sequence_for_guidance(seq_id, target_tokens)?; + } + } + + // Update sequence status for reprocessing + seq.status = SequenceStatus::Running; + + Ok(()) + } + pub fn is_finished(&self) -> bool { self.waiting.is_empty() && self.running.is_empty() } @@ -535,6 +608,38 @@ impl Scheduler { } } + // Check for grammar validation failures using llguidance + // Validate the entire output_ids sequence + let seq = &self.running[idx]; + let seq_id = seq.id; + let output_ids = seq.output_ids.clone(); + + if self.should_rollback_for_grammar(seq_id, &output_ids) { + let target_tokens = output_ids.len(); + let target_blocks = target_tokens.div_ceil(self.block_manager.get_block_size()); + let target_tokens_aligned = target_blocks * self.block_manager.get_block_size(); + + crate::log_info!( + "[Seq {}] Grammar validation failed, rolling back to {} tokens ({} blocks)", + seq_id, + target_tokens_aligned, + target_blocks + ); + + // Trigger rollback + if let Err(e) = self.rollback_sequence(seq_id, target_tokens_aligned) { + crate::log_error!( + "[Seq {}] Rollback failed: {}. Finishing sequence.", + seq_id, + e + ); + let seq = &mut self.running[idx]; + seq.status = SequenceStatus::Finished; + self.block_manager.deallocate(seq); + } + continue; + } + let matched_stop_sequence_idx = self.stop_sequence_match_index(token, &self.running[idx]); let hit_stop_sequence = matched_stop_sequence_idx.is_some(); @@ -1134,7 +1239,7 @@ impl Scheduler { /// Check if the given token is a tool call end token /// This supports both: /// 1. Explicit tool call end tokens (e.g., in XML format) - /// 2. JSON end token "}" combined with Regex validation for {..."name":..., "arguments":...} pattern + /// 2. JSON end token "}" combined with prefix_could_be_tool validation pub fn is_tool_call_end(&self, token: u32, idx: usize) -> bool { // 1. Check for explicit tool call end tokens (XML style) if self.tool_call_end_token_ids.contains(&token) { @@ -1152,19 +1257,24 @@ impl Scheduler { return true; } - // 2. Check for JSON style tool call using Regex - // This handles models like Qwen3 that output raw JSON without XML tags - if self.json_end_token_id == Some(token) { + // 2. Check for JSON style tool call by attempting to parse complete JSON + if self.json_end_token_ids.contains(&token) { if let Some(tokenizer) = &self.tokenizer { // Temporarily add the token to get complete output for decoding let mut temp_output = self.running[idx].output_ids.to_vec(); temp_output.push(token); if let Ok(decoded) = tokenizer.decode(&temp_output, true) { - // Check for JSON tool call pattern using Regex - // The pattern matches if the decoded string ends with a valid JSON tool call structure - if self.tool_call_regex.is_match(&decoded) { - return true; + let trimmed = decoded.trim(); + if let Ok(val) = serde_json::from_str::(trimmed) { + if val.is_object() || val.is_array() { + return true; + } + } else { + let (_could_be, is_complete) = prefix_could_be_tool(trimmed); + if is_complete { + return true; + } } } } @@ -1173,6 +1283,11 @@ impl Scheduler { false } + /// Get the EOS token IDs from the scheduler + pub fn eos_token_ids(&self) -> &[u32] { + &self.eos_token_id + } + fn stop_sequence_match_index(&self, token: u32, seq: &Sequence) -> Option { let Some(stop_sequences) = &seq.sampling_params.stop_token_ids else { return None; diff --git a/src/core/sequence.rs b/src/core/sequence.rs index d096bbe4..00de6cb2 100644 --- a/src/core/sequence.rs +++ b/src/core/sequence.rs @@ -28,6 +28,13 @@ impl fmt::Display for SequenceStatus { } } +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct RollbackSnapshot { + pub block_table: Vec, + pub num_cached_tokens: usize, + pub mamba_prefix_hash: Option, +} + #[derive(Serialize, Deserialize, Debug, Clone)] pub struct Sequence { pub id: usize, @@ -47,6 +54,10 @@ pub struct Sequence { pub is_tool_call_end: bool, pub hit_stop_sequence: bool, pub stop_sequence: Option, + /// Snapshot for rollback on speculative decoding mismatch + pub rollback_snapshot: Option, + /// Rollback counter for guidance constraints to prevent infinite loops + pub guidance_rollback_count: usize, } #[derive(Serialize, Deserialize, Debug, Clone)] @@ -179,6 +190,8 @@ impl Sequence { is_tool_call_end: false, hit_stop_sequence: false, stop_sequence: None, + rollback_snapshot: None, + guidance_rollback_count: 0, } } @@ -235,4 +248,22 @@ impl Sequence { pub fn clear_block_table(&mut self) { self.block_table.clear(); } + + /// Save current state as rollback snapshot + pub fn save_rollback_snapshot(&mut self) { + self.rollback_snapshot = Some(RollbackSnapshot { + block_table: self.block_table.clone(), + num_cached_tokens: self.num_cached_tokens, + mamba_prefix_hash: self.mamba_prefix_hash, + }); + } + + /// Restore from rollback snapshot + pub fn restore_from_snapshot(&mut self) { + if let Some(snapshot) = self.rollback_snapshot.take() { + self.block_table = snapshot.block_table; + self.num_cached_tokens = snapshot.num_cached_tokens; + self.mamba_prefix_hash = snapshot.mamba_prefix_hash; + } + } } diff --git a/src/main.rs b/src/main.rs index b7ac1b1f..61ce7374 100644 --- a/src/main.rs +++ b/src/main.rs @@ -182,36 +182,38 @@ async fn main() -> Result<()> { }; let econfig = EngineConfig::new( - args.model_id, - args.weight_path, - args.weight_file, - args.hf_token, - args.hf_token_path, - args.enforce_parser.clone(), - Some(std::cmp::max(max_num_seqs, prompts.len())), - None, - max_model_len, - Some(args.max_tokens), - args.isq.clone(), - Some(1), - args.device_ids.clone(), - generation_cfg, - args.seed, - Some(prefix_cache), - args.prefix_cache_max_tokens, - Some(args.fp8_kvcache), - Some(args.server || args.ui_server || !interactive), - args.cpu_mem_fold, - args.kv_fraction, - args.mamba_fraction, - pd_config, - args.mcp_command.clone(), - args.mcp_config.clone(), - args.mcp_args.clone(), - tool_prompt_template, - None, // pd_server_prefix_cache_ratio - None, // pd_client_prefix_cache_ratio - ); + args.model_id, + args.weight_path, + args.weight_file, + args.hf_token, + args.hf_token_path, + args.enforce_parser.clone(), + Some(std::cmp::max(max_num_seqs, prompts.len())), + None, + max_model_len, + Some(args.max_tokens), + args.isq.clone(), + Some(1), + args.device_ids.clone(), + generation_cfg, + args.seed, + Some(prefix_cache), + args.prefix_cache_max_tokens, + Some(args.fp8_kvcache), + Some(args.server || args.ui_server || !interactive), + args.cpu_mem_fold, + args.kv_fraction, + args.mamba_fraction, + pd_config, + args.mcp_command.clone(), + args.mcp_config.clone(), + args.mcp_args.clone(), + tool_prompt_template, + None, // pd_server_prefix_cache_ratio + None, // pd_client_prefix_cache_ratio + args.allow_constraint_api, + args.enable_tool_grammar, + ); let engine = LLMEngine::new(&econfig, dtype)?; if args.server || args.ui_server || args.pd_server { diff --git a/src/py/mod.rs b/src/py/mod.rs index 54d59061..51b3c640 100644 --- a/src/py/mod.rs +++ b/src/py/mod.rs @@ -7,6 +7,7 @@ use crate::transfer::{PdConfig, PdMethod, PdRole}; use crate::utils::chat_template::Message; use crate::utils::config::{EngineConfig, GenerationConfig, SamplingParams}; use crate::utils::get_dtype; +use llguidance::api::TopLevelGrammar; use parking_lot::RwLock; use pyo3::exceptions::PyStopIteration; use pyo3::exceptions::PyValueError; @@ -268,7 +269,8 @@ impl EngineConfig { fp8_kvcache=None, server_mode=None, cpu_mem_fold=None, kv_fraction=None, mamba_fraction=None, pd_config=None, mcp_command=None, mcp_config=None, mcp_args=None, tool_prompt_template=None, - pd_server_prefix_cache_ratio=None, pd_client_prefix_cache_ratio=None))] + pd_server_prefix_cache_ratio=None, pd_client_prefix_cache_ratio=None, + allow_constraint_api=false, enable_tool_grammar=false))] pub fn new( model_id: Option, weight_path: Option, @@ -299,6 +301,8 @@ impl EngineConfig { tool_prompt_template: Option, pd_server_prefix_cache_ratio: Option, pd_client_prefix_cache_ratio: Option, + allow_constraint_api: bool, + enable_tool_grammar: bool, ) -> Self { let mut device_ids = device_ids.unwrap_or_default(); if device_ids.is_empty() { @@ -342,6 +346,8 @@ impl EngineConfig { tool_prompt_template, pd_server_prefix_cache_ratio, pd_client_prefix_cache_ratio, + allow_constraint_api, + enable_tool_grammar, } } } @@ -351,7 +357,8 @@ impl SamplingParams { #[new] #[pyo3(signature = (temperature=None, max_tokens=None, ignore_eos=Some(false), top_k=None, top_p=None, session_id=None, - frequency_penalty=None, presence_penalty=None, thinking=None))] + frequency_penalty=None, presence_penalty=None, thinking=None, + grammar_json=None))] pub fn new( temperature: Option, max_tokens: Option, @@ -362,7 +369,13 @@ impl SamplingParams { frequency_penalty: Option, presence_penalty: Option, thinking: Option, + grammar_json: Option, ) -> Self { + // Convert grammar_json to TopLevelGrammar if present + let grammar = grammar_json.as_ref().and_then(|s| { + serde_json::from_str::(s).ok() + }); + Self { temperature, max_tokens, @@ -376,6 +389,8 @@ impl SamplingParams { stop_sequences: None, stop_token_ids: None, thinking, + grammar_json, + grammar, } } @@ -394,6 +409,24 @@ impl SamplingParams { stop_sequences: None, stop_token_ids: None, thinking: None, + grammar_json: None, + grammar: None, + } + } + + #[getter] + fn grammar_json(&self) -> Option { + self.grammar.as_ref().and_then(|g| serde_json::to_string(g).ok()) + } + + #[setter] + fn set_grammar_json(&mut self, value: Option) { + self.grammar_json = value.clone(); + // Also update grammar from JSON if provided + if let Some(ref s) = value { + self.grammar = serde_json::from_str::(s).ok(); + } else { + self.grammar = None; } } } diff --git a/src/runner/mod.rs b/src/runner/mod.rs index a22e2445..6d0497dd 100644 --- a/src/runner/mod.rs +++ b/src/runner/mod.rs @@ -10,6 +10,7 @@ use interprocess::local_socket::Stream as LocalStream; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::io::{Read, Write}; +use rmp_serde; #[derive(Serialize, Deserialize, Debug, Clone)] pub struct RunnerInitRequest { pub rank: usize, @@ -253,7 +254,7 @@ pub fn send_local( let serialized = if use_json { serde_json::to_vec(message).expect("JSON serialization failed") } else { - bincode::serialize(message).expect("Bincode serialization failed") + rmp_serde::to_vec(message).expect("RMP serialization failed") }; for stream in streams.iter_mut() { @@ -285,7 +286,7 @@ pub fn receive_local(stream: &mut LocalStream, use_json: bool) -> std::io::Resul let message: MessageType = if use_json { serde_json::from_slice(&serialized).expect("JSON deserialization failed") } else { - bincode::deserialize(&serialized).expect("Bincode deserialization failed") + rmp_serde::from_slice(&serialized).expect("RMP deserialization failed") }; // Send acknowledgment diff --git a/src/runner/runner.rs b/src/runner/runner.rs index ee3d8bed..470507d1 100644 --- a/src/runner/runner.rs +++ b/src/runner/runner.rs @@ -7,13 +7,14 @@ use std::io::Write; use std::rc::Rc; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; +use tokenizers::Tokenizer; use vllm_rs::core::runner::{ModelRunner, Seqs}; use vllm_rs::models::layers::distributed::Comm; use vllm_rs::models::layers::VarBuilderX; use vllm_rs::runner::{receive_local, send_local, MessageType}; use vllm_rs::transfer::PdRole; use vllm_rs::transfer::Transfer; -use vllm_rs::utils::guidance::load_toktrie_from_path; +use vllm_rs::utils::guidance::build_llg_factory; use vllm_rs::utils::heartbeat::heartbeat_worker; use vllm_rs::utils::new_device; use vllm_rs::utils::progress::{ProgressLike, ProgressReporter, RemoteProgressReporter}; @@ -134,8 +135,15 @@ fn main() -> anyhow::Result<()> { )?; let stream_kv = Some(stream.try_clone()?); let mut econfig = init_req.econfig.clone(); - let toktrie = load_toktrie_from_path(&init_req.model_pathes.get_tokenizer_filename()) - .map(Arc::new); + let tokenizer = Tokenizer::from_file(init_req.model_pathes.get_tokenizer_filename()) + .map_err(|e| anyhow::anyhow!("Failed to load tokenizer: {}", e))?; + let llg_factory = match build_llg_factory(tokenizer, init_req.config.vocab_size) { + Ok(f) => Some(f), + Err(e) => { + vllm_rs::log_warn!("Failed to build llguidance factory: {}", e); + None + } + }; #[allow(unused_mut)] let mut runner = ModelRunner::new( init_req.model_type, @@ -148,7 +156,7 @@ fn main() -> anyhow::Result<()> { device, progress_reporter, transfer, - toktrie, + llg_factory, stream_kv, )?; diff --git a/src/server/mod.rs b/src/server/mod.rs index 46732b30..b55dc18d 100644 --- a/src/server/mod.rs +++ b/src/server/mod.rs @@ -1,3 +1,4 @@ +// src/server/mod.rs use clap::Parser; use serde::{Deserialize, Serialize}; pub mod claude_server; @@ -10,6 +11,7 @@ use crate::server::streaming::Streamer; use crate::transfer::PdRole; use crate::utils::chat_template::Message; use crate::utils::config::EngineConfig; +use crate::utils::guidance::TopLevelGrammarExt; use crate::utils::image::{ compute_tokens_per_image, get_tensor_raw_data, load_image_from_base64, load_image_from_url, ImageData, ImageProcessConfig, ImageProcessTrait, IMAGE_PLACEHOLDER, @@ -26,6 +28,7 @@ use parking_lot::RwLock; use rustchatui::start_ui_server; use serde_json::json; use std::collections::HashMap; +use crate::tools::schema::{schema_to_tools, ToolGrammarBuilder}; use std::path::Path; use std::sync::Arc; use tower_http::cors::{Any, CorsLayer}; @@ -42,6 +45,8 @@ pub struct ChatCompletionRequest { pub presence_penalty: Option, #[serde(alias = "enable_thinking")] pub thinking: Option, + #[serde(default, alias = "stop_sequences")] + pub stop: Option>, pub stream: Option, pub session_id: Option, /// Tools available for the model to call @@ -50,6 +55,22 @@ pub struct ChatCompletionRequest { /// How the model should choose which tool to call #[serde(default)] pub tool_choice: Option, + /// OpenAI-style response format for structured outputs + #[serde(default)] + pub response_format: Option, + /// Extra body for OpenAI-compatible clients (e.g. structured_outputs) + #[serde(default)] + pub extra_body: Option, + /// Direct structured_outputs for convenience (parsed from extra_body if not present) + #[serde(default, alias = "structured_outputs")] + pub structured_outputs: Option, + /// Legacy constraint field for llguidance (llg-new.diff pattern) + /// Use constraint_type to specify grammar format: "regex", "lark", "json_schema" + #[serde(alias = "grammar", default)] + pub constraint: Option, + /// Type of constraint for legacy constraint field + #[serde(default)] + pub constraint_type: Option, } pub fn resolve_engine_model_id(econfig: &EngineConfig) -> Option { @@ -100,6 +121,163 @@ impl Default for EncodingFormat { } } +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(rename_all = "snake_case")] +pub struct StructuredOutputs { + #[serde(default)] + pub choice: Option>, + #[serde(default)] + pub regex: Option, + #[serde(default)] + pub json: Option, + #[serde(default)] + pub grammar: Option, + #[serde(default)] + pub structural_tag: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(rename_all = "snake_case")] +pub struct ResponseFormatJsonSchema { + #[serde(default)] + pub name: Option, + pub schema: serde_json::Value, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(rename_all = "snake_case")] +pub struct ResponseFormat { + #[serde(rename = "type")] + pub format_type: String, + #[serde(default)] + pub json_schema: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(rename_all = "snake_case")] +pub struct ExtraBody { + #[serde(default)] + pub structured_outputs: Option, + #[serde(flatten)] + pub extra: HashMap, +} + +// TopLevelGrammar conversion functions +// Client grammars are composed via merge_top_level_grammars alongside TEXT and tool grammars. + +pub fn grammar_fragment_from_structured_outputs(structured: &StructuredOutputs) -> Result> { + crate::log_debug!("[llg] grammar_fragment_from_structured_outputs() called"); + + let mut selected: Option = None; + let mut constraint_count = 0; + + if let Some(choice) = &structured.choice { + if !choice.is_empty() { + constraint_count += 1; + if constraint_count > 1 { + crate::log_error!("[llg] Multiple constraints specified - structured_outputs must set exactly one of choice, regex, json, grammar, or structural_tag"); + return Err(candle_core::Error::msg("structured_outputs must set exactly one of choice, regex, json, grammar, or structural_tag")); + } + crate::log_debug!("[llg] Building choice grammar from: {:?}", choice); + let choice_gram = crate::tools::schema::build_choice_lark_grammar(choice) + .map_err(|e| candle_core::Error::msg(e))?; + selected = Some(choice_gram); + } + } + + if let Some(regex) = &structured.regex { + constraint_count += 1; + if constraint_count > 1 { + crate::log_error!("[llg] Multiple constraints specified - structured_outputs must set exactly one of choice, regex, json, grammar, or structural_tag"); + return Err(candle_core::Error::msg("structured_outputs must set exactly one of choice, regex, json, grammar, or structural_tag")); + } + crate::log_debug!("[llg] Building regex grammar: {}", regex); + let regex_gram = TopLevelGrammarExt::from_regex_ascii(regex); + selected = Some(regex_gram); + } + + if let Some(schema) = &structured.json { + constraint_count += 1; + if constraint_count > 1 { + crate::log_error!("[llg] Multiple constraints specified - structured_outputs must set exactly one of choice, regex, json, grammar, or structural_tag"); + return Err(candle_core::Error::msg("structured_outputs must set exactly one of choice, regex, json, grammar, or structural_tag")); + } + crate::log_debug!("[llg] Building JSON schema grammar"); + let schema = crate::tools::schema::sanitize_schema_for_llguidance(schema); + let json_gram = TopLevelGrammarExt::from_json_schema_utf8(schema) + .map_err(|e| candle_core::Error::msg(e.to_string()))?; + selected = Some(json_gram); + } + + if let Some(grammar) = &structured.grammar { + constraint_count += 1; + if constraint_count > 1 { + crate::log_error!("[llg] Multiple constraints specified - structured_outputs must set exactly one of choice, regex, json, grammar, or structural_tag"); + return Err(candle_core::Error::msg("structured_outputs must set exactly one of choice, regex, json, grammar, or structural_tag")); + } + crate::log_debug!("[llg] Using Lark grammar from structured_outputs.grammar"); + let lark_gram = TopLevelGrammarExt::from_lark_utf8(grammar); + selected = Some(lark_gram); + } + + if let Some(tag) = &structured.structural_tag { + constraint_count += 1; + if constraint_count > 1 { + crate::log_error!("[llg] Multiple constraints specified - structured_outputs must set exactly one of choice, regex, json, grammar, or structural_tag"); + return Err(candle_core::Error::msg("structured_outputs must set exactly one of choice, regex, json, grammar, or structural_tag")); + } + crate::log_debug!("[llg] Building tool call grammar from structural_tag"); + let (start, end, schema) = crate::tools::schema::parse_structural_tag(tag) + .map_err(|e| candle_core::Error::msg(e))?; + let schema = crate::tools::schema::sanitize_schema_for_llguidance(&schema); + // Convert schema Value to Vec for build_json_tool_lark_grammar + let tools = schema_to_tools(&schema); + // structural_tag uses text-based matching, pass None for token IDs + let tool_gram = ToolGrammarBuilder::new() + .tools(&tools) + .start_tag(&start) + .end_tag(&end) + .start_is_special(false) + .end_is_special(false) + .build_json(); + selected = Some(tool_gram); + } + + if selected.is_none() { + crate::log_error!("[llg] No constraint specified in structured_outputs - must set exactly one of choice, regex, json, grammar, or structural_tag"); + return Err(candle_core::Error::msg("structured_outputs must set exactly one of choice, regex, json, grammar, or structural_tag")); + } + + crate::log_info!("[llg] grammar_fragment_from_structured_outputs() completed with grammar: {:?}", selected.is_some()); + Ok(selected) +} + +pub fn grammar_fragment_from_response_format(response_format: &ResponseFormat) -> Result> { + crate::log_debug!("[llg] grammar_fragment_from_response_format() called with type: {}", response_format.format_type); + + match response_format.format_type.as_str() { + "json_schema" => { + let Some(schema) = response_format.json_schema.as_ref() else { + crate::log_error!("[llg] response_format.json_schema is required for type=json_schema"); + return Err(candle_core::Error::msg("response_format.json_schema is required")); + }; + crate::log_debug!("[llg] Building JSON schema grammar from response_format"); + let schema = crate::tools::schema::sanitize_schema_for_llguidance(&schema.schema); + let json_gram = TopLevelGrammarExt::from_json_schema_utf8(schema) + .map_err(|e| candle_core::Error::msg(e.to_string()))?; + crate::log_info!("[llg] grammar_fragment_from_response_format() completed with grammar"); + Ok(Some(json_gram)) + } + other => { + crate::log_error!("[llg] Unsupported response_format type '{}'; only 'json_schema' is supported", other); + Err(candle_core::Error::msg(format!( + "Unsupported response_format type '{}'; only 'json_schema' is supported", + other + ))) + } + } +} + #[derive(Serialize, Deserialize, Debug, Clone)] #[serde(rename_all = "snake_case")] pub enum EmbeddingStrategy { @@ -602,6 +780,14 @@ pub struct Args { /// MCP server arguments (comma-separated) #[arg(long, value_delimiter = ',', default_value = None)] pub mcp_args: Option>, + + /// Allow client-submitted constraints via HTTP API + #[arg(long, default_value = "false")] + pub allow_constraint_api: bool, + + /// Whether to automatically build LLG grammar from tools + #[arg(long, default_value = "false")] + pub enable_tool_grammar: bool, } /// Result of executing tool calls via MCP @@ -1249,10 +1435,7 @@ mod tests { #[test] fn test_chat_completion_tool_choice_required_parsing() { - let json = r#"{ - "messages": [{"role":"user","content":"hi"}], - "tool_choice": "required" - }"#; + let json = r#"{"messages": [{"role":"user","content":"hi"}], "tool_choice": "required"}"#; let request: ChatCompletionRequest = serde_json::from_str(json).unwrap(); assert!(matches!( request.tool_choice, @@ -1261,4 +1444,138 @@ mod tests { )) )); } + + #[test] + fn test_grammar_fragment_from_structured_outputs_choice() { + let so = StructuredOutputs { + choice: Some(vec!["option1".to_string(), "option2".to_string()]), + regex: None, + json: None, + grammar: None, + structural_tag: None, + }; + let result = grammar_fragment_from_structured_outputs(&so); + assert!(result.is_ok()); + assert!(result.unwrap().is_some()); + } + + #[test] + fn test_grammar_fragment_from_structured_outputs_json() { + let so = StructuredOutputs { + choice: None, + regex: None, + json: Some(serde_json::json!({"type": "object", "properties": {}})), + grammar: None, + structural_tag: None, + }; + let result = grammar_fragment_from_structured_outputs(&so); + assert!(result.is_ok()); + assert!(result.unwrap().is_some()); + } + + #[test] + fn test_grammar_fragment_from_structured_outputs_regex() { + let so = StructuredOutputs { + choice: None, + regex: Some("^[a-z]+$".to_string()), + json: None, + grammar: None, + structural_tag: None, + }; + let result = grammar_fragment_from_structured_outputs(&so); + assert!(result.is_ok()); + assert!(result.unwrap().is_some()); + } + + #[test] + fn test_grammar_fragment_from_structured_outputs_grammar() { + let so = StructuredOutputs { + choice: None, + regex: None, + json: None, + // Grammar without start: - that's managed by ComposedGrammar + grammar: Some("'hello' 'world'".to_string()), + structural_tag: None, + }; + let result = grammar_fragment_from_structured_outputs(&so); + assert!(result.is_ok()); + assert!(result.unwrap().is_some()); + } + + #[test] + fn test_grammar_fragment_from_structured_outputs_empty() { + let so = StructuredOutputs { + choice: None, + regex: None, + json: None, + grammar: None, + structural_tag: None, + }; + let result = grammar_fragment_from_structured_outputs(&so); + assert!(result.is_err()); + } + + #[test] + fn test_grammar_fragment_from_structured_outputs_too_many() { + let so = StructuredOutputs { + choice: Some(vec!["a".to_string()]), + regex: Some("b".to_string()), + json: None, + grammar: None, + structural_tag: None, + }; + let result = grammar_fragment_from_structured_outputs(&so); + assert!(result.is_err()); + } + + #[test] + fn test_grammar_fragment_from_response_format_json_schema() { + let rf = ResponseFormat { + format_type: "json_schema".to_string(), + json_schema: Some(ResponseFormatJsonSchema { + name: None, + schema: serde_json::json!({"type": "object", "properties": {}}), + }), + }; + let result = grammar_fragment_from_response_format(&rf); + assert!(result.is_ok()); + assert!(result.unwrap().is_some()); + } + + #[test] + fn test_grammar_fragment_from_response_format_missing_json_schema() { + let rf = ResponseFormat { + format_type: "json_schema".to_string(), + json_schema: None, + }; + let result = grammar_fragment_from_response_format(&rf); + assert!(result.is_err()); + } + + #[test] + fn test_grammar_fragment_from_response_format_unsupported_type() { + let rf = ResponseFormat { + format_type: "unsupported".to_string(), + json_schema: None, + }; + let result = grammar_fragment_from_response_format(&rf); + assert!(result.is_err()); + } + + #[test] + fn test_grammar_fragment_from_response_format_json_schema_composed() { + // Test that json_schema grammars pass through ComposedGrammar + let rf = ResponseFormat { + format_type: "json_schema".to_string(), + json_schema: Some(ResponseFormatJsonSchema { + name: None, + schema: serde_json::json!({"type": "object", "properties": {"test": {"type": "string"}}}), + }), + }; + let result = grammar_fragment_from_response_format(&rf); + assert!(result.is_ok()); + // The grammar was created via ComposedGrammar - just verify it's Some + let grammar = result.unwrap(); + assert!(grammar.is_some()); + } } diff --git a/src/server/parser.rs b/src/server/parser.rs index a95ab859..f771e1b9 100644 --- a/src/server/parser.rs +++ b/src/server/parser.rs @@ -50,6 +50,8 @@ pub struct ToolConfig { pub end_token_ids: HashSet, pub start_token_str: String, pub end_token_str: String, + pub start_is_special: bool, + pub end_is_special: bool, } impl ToolConfig { @@ -68,6 +70,8 @@ impl ToolConfig { end_token_ids: end_ids, start_token_str: "<|python_tag|>".to_string(), end_token_str: "<|eom_id|>".to_string(), + start_is_special: false, + end_is_special: false, } } ModelType::Qwen3 @@ -83,6 +87,8 @@ impl ToolConfig { end_token_ids: end_ids, start_token_str: "".to_string(), end_token_str: "".to_string(), + start_is_special: false, + end_is_special: false, } } ModelType::Mistral | ModelType::Mistral3VL => { @@ -93,6 +99,8 @@ impl ToolConfig { end_token_ids: end_ids, start_token_str: "[TOOL_CALLS]".to_string(), end_token_str: "]".to_string(), + start_is_special: false, + end_is_special: false, } } ModelType::Gemma | ModelType::Gemma3 => { @@ -102,6 +110,8 @@ impl ToolConfig { end_token_ids: end_ids, start_token_str: "".to_string(), end_token_str: "".to_string(), + start_is_special: false, + end_is_special: false, } } // Phi, GLM, Yi, StableLM, DeepSeek - use Qwen format (text-only) @@ -116,6 +126,8 @@ impl ToolConfig { end_token_ids: HashSet::new(), start_token_str: "".to_string(), end_token_str: "".to_string(), + start_is_special: false, + end_is_special: false, }, } } @@ -1046,7 +1058,7 @@ impl StreamToolParser { serde_json::from_str::>(trimmed).is_ok() } - fn parser_name_for_model(model_type: &ModelType, model_id: &str) -> &'static str { + pub fn parser_name_for_model(model_type: &ModelType, model_id: &str) -> &'static str { let model_lower = model_id.to_ascii_lowercase(); match model_type { ModelType::LLaMa => "llama", diff --git a/src/server/server.rs b/src/server/server.rs index eb88978a..69000061 100644 --- a/src/server/server.rs +++ b/src/server/server.rs @@ -1,6 +1,9 @@ // src/server/server.rs use super::logger::ChatCompletionLogger; +use crate::utils::guidance::{compose_grammars, get_lark_from_top_level_grammar, TopLevelGrammarExt}; +use llguidance::api::TopLevelGrammar; use super::{ + grammar_fragment_from_structured_outputs, grammar_fragment_from_response_format, build_messages_and_images, streaming::{ChatResponse, Streamer, StreamingStatus}, ChatResponder, DetokenizeRequest, DetokenizeResponse, EmbeddingRequest, EmbeddingResponse, @@ -16,8 +19,10 @@ use crate::server::parser::{BufferedFinalizeResult, StreamResult, StreamToolPars use crate::tools::helpers::{ build_invalid_tool_call_feedback, build_tool_schema_map, filter_tool_calls, log_tool_calls, resolve_tools, retain_tool_calls_forced_name, strict_tool_call_validation_enabled, + sanitize_tools_for_llguidance, }; use crate::tools::{ToolChoice, ToolChoiceMode}; +use crate::tools::schema::ToolGrammarBuilder; use crate::utils::config::SamplingParams; use axum::{ extract::{Json, Query, State}, @@ -32,6 +37,7 @@ use tokio::sync::watch; use tokio::task; use uuid::Uuid; + /// Helper struct to manage streaming response chunks /// Provides clean API for sending tokens, errors, and status notifications struct StreamingContext { @@ -275,14 +281,124 @@ pub async fn chat_completion( params.session_id = request.session_id.clone(); params.thinking = request.thinking.clone(); let (img_cfg, model_type, tool_config, engine_config) = { - let e = data.engine.read(); - ( - e.img_cfg.clone(), - e.model_type.clone(), - e.tool_config.clone(), - e.econfig.clone(), - ) - }; + let e = data.engine.read(); + ( + e.img_cfg.clone(), + e.model_type.clone(), + e.tool_config.clone(), + e.econfig.clone(), + ) + }; + let model_type = model_type.clone(); // Clone for later use + + // Collect all TopLevelGrammars from various sources + let mut constraint_grammars: Vec = Vec::new(); + + // Handle client-submitted constraints via structured_outputs or response_format + // First check top-level structured_outputs for convenience + if let Some(ref structured) = request.structured_outputs { + if engine_config.allow_constraint_api { + match grammar_fragment_from_structured_outputs(structured) { + Ok(Some(grammar)) => { + constraint_grammars.push(grammar); + crate::log_debug!("[llg] Collected constraint grammar from top-level structured_outputs"); + } + Ok(None) => { + // No constraint specified + } + Err(err) => { + crate::log_error!("[llg] Failed to parse structured_outputs: {:?}", err); + return ChatResponder::ValidationError(format!("{:?}", err)); + } + } + } else { + crate::log_warn!("[llg] Client-submitted constraints are disabled. Set --allow-constraint-api to enable."); + } + } + // Fallback to extra_body.structured_outputs for backwards compatibility + else if let Some(ref extra_body) = request.extra_body { + if let Some(ref structured) = extra_body.structured_outputs { + if engine_config.allow_constraint_api { + match grammar_fragment_from_structured_outputs(structured) { + Ok(Some(grammar)) => { + constraint_grammars.push(grammar); + crate::log_debug!("[llg] Collected constraint grammar from extra_body.structured_outputs"); + } + Ok(None) => { + // No constraint specified + } + Err(err) => { + crate::log_error!("[llg] Failed to parse structured_outputs: {:?}", err); + return ChatResponder::ValidationError(format!("{:?}", err)); + } + } + } else { + crate::log_warn!("[llg] Client-submitted constraints are disabled. Set --allow-constraint-api to enable."); + } + } + } + + if let Some(ref response_format) = request.response_format { + if engine_config.allow_constraint_api { + match grammar_fragment_from_response_format(response_format) { + Ok(Some(grammar)) => { + constraint_grammars.push(grammar); + crate::log_debug!("[llg] Collected constraint grammar from response_format"); + } + Ok(None) => { + // No constraint specified + } + Err(err) => { + crate::log_error!("[llg] Failed to parse response_format: {:?}", err); + return ChatResponder::ValidationError(format!("{:?}", err)); + } + } + } else { + crate::log_warn!("[llg] Client-submitted constraints are disabled. Set --allow-constraint-api to enable."); + } + } + + // Legacy constraint field (PROTECTED by allow_constraint_api flag) + if engine_config.allow_constraint_api { + if let Some(ref grammar_str) = request.constraint { + let constraint_type = request.constraint_type.as_deref().unwrap_or("regex"); + match constraint_type { + "regex" => { + let llg_grammar = TopLevelGrammarExt::from_regex_ascii(grammar_str); + constraint_grammars.push(llg_grammar); + crate::log_debug!("[llg] Generated regex constraint"); + } + "lark" => { + let llg_grammar = TopLevelGrammarExt::from_lark_utf8(grammar_str); + constraint_grammars.push(llg_grammar); + crate::log_debug!("[llg] Generated lark constraint"); + } + "json_schema" | "json" => { + match serde_json::from_str::(grammar_str) { + Ok(val) => { + match TopLevelGrammarExt::from_json_schema_utf8(val) { + Ok(llg_grammar) => { + constraint_grammars.push(llg_grammar); + crate::log_debug!("[llg] Generated json_schema constraint"); + } + Err(e) => { + crate::log_warn!("[llg] Failed to parse json_schema constraint: {:?}", e); + } + } + } + Err(e) => { + crate::log_warn!("[llg] Failed to parse json_schema constraint: {:?}", e); + } + } + } + _ => { + crate::log_warn!("[llg] Unknown constraint_type: {}", constraint_type); + } + } + } + } else { + crate::log_warn!("[llg] Client-submitted constraints are disabled. Set --allow-constraint-api to enable."); + } let mcp_tools = data .mcp_manager @@ -334,10 +450,17 @@ pub async fn chat_completion( } } - let tool_schemas = Arc::new(build_tool_schema_map(&resolved_tools)); + // Sanitize tools before building schema map to ensure ASCII-only tool names + let sanitized_tools = sanitize_tools_for_llguidance(&resolved_tools); + let tool_schemas = Arc::new(build_tool_schema_map(&sanitized_tools)); let has_tools = !resolved_tools.is_empty(); params.mcp_mode = if has_tools { Some(true) } else { None }; + // Compose all grammars using compose_grammars from guidance.rs + // Clone forced_tool_name for later use in retain_tool_calls_forced_name + let forced_tool_name_clone = forced_tool_name.clone(); + + if has_tools { crate::log_warn!("Tools enabled for request"); } @@ -347,8 +470,44 @@ pub async fn chat_completion( return ChatResponder::ValidationError(err); } let parser_model_id = - super::resolve_engine_model_id(&engine_config).unwrap_or_else(|| model_id.clone()); - let enforce_parser = engine_config.enforce_parser.clone(); + super::resolve_engine_model_id(&engine_config).unwrap_or_else(|| model_id.clone()); + let enforce_parser = engine_config.enforce_parser.clone(); + + // Build tool grammar based on parser type (XML for qwen_coder, JSON for others) + // Honor parser override flag (--enforce-parser) when available + let tool_parser_name = if let Some(ref enforced) = enforce_parser { + enforced.clone() + } else { + StreamToolParser::parser_name_for_model(&model_type, &parser_model_id).to_string() + }; + let use_xml_grammar = tool_parser_name == "qwen_coder"; + let tool_gram = if has_tools && engine_config.enable_tool_grammar { + let tool_gram = if use_xml_grammar { + crate::tools::schema::build_xml_tool_lark_grammar( + &sanitized_tools, + &tool_config.start_token_str, + &tool_config.end_token_str, + tool_config.start_is_special, + tool_config.end_is_special, + Some(&tool_config.start_token_ids), + Some(&tool_config.end_token_ids), + ) + } else { + ToolGrammarBuilder::new() + .tools(&sanitized_tools) + .start_tag(&tool_config.start_token_str) + .end_tag(&tool_config.end_token_str) + .start_is_special(tool_config.start_is_special) + .end_is_special(tool_config.end_is_special) + .start_token_ids(Some(tool_config.start_token_ids.clone())) + .end_token_ids(Some(tool_config.end_token_ids.clone())) + .build_json() + }; + crate::log_debug!("[llg] Built tool grammar (use_xml_grammar={})", use_xml_grammar); + Some(tool_gram) + } else { + None + }; let (messages, image_data) = match build_messages_and_images(&chat_messages, img_cfg.as_ref()) { Ok(output) => output, @@ -363,6 +522,28 @@ pub async fn chat_completion( .unwrap() .as_millis() as u64; + if constraint_grammars.is_empty() && !engine_config.enable_tool_grammar { + crate::log_debug!("[llg] No constraint or tool grammar - not setting guidance"); + } else { + // Get SpecialTokens from engine for building TEXT pattern with EOS bounding + let engine = data.engine.read(); + let special_tokens = &engine.special_tokens; + let llg_grammar = compose_grammars( + constraint_grammars, + tool_gram, + has_tools, + tool_choice_required, + forced_tool_name.clone(), + Some(max_tokens.clone()), + special_tokens, + ); + drop(engine); // Explicitly drop the lock guard + let lark_string = get_lark_from_top_level_grammar(&llg_grammar); + crate::log_debug!("[llg] TopLevelGrammar for SamplingParams: {:?}", &llg_grammar); + crate::log_debug!("[llg] Lark grammar string:\n{}", lark_string); + params.grammar = Some(llg_grammar); + } + if use_stream { let session_id = params.session_id.clone(); if let Some(sid) = session_id { @@ -401,7 +582,6 @@ pub async fn chat_completion( enforce_parser.clone(), ); tool_parser.set_initial_reasoning_end_marker(prefilled_reasoning_end); - let forced_tool_name = forced_tool_name.clone(); let stream_tool_schemas = tool_schemas.clone(); if let Some(ref l) = logger { l.log_start_response(); @@ -725,7 +905,7 @@ pub async fn chat_completion( let dropped = retain_tool_calls_forced_name( &mut pending_tool_calls, - forced_tool_name.as_deref(), + forced_tool_name_clone.as_deref(), ); if dropped > 0 { crate::log_warn!( @@ -752,7 +932,7 @@ pub async fn chat_completion( let invalid_feedback = build_invalid_tool_call_feedback( &invalid_calls, stream_tool_schemas.as_ref(), - forced_tool_name.as_deref(), + forced_tool_name_clone.as_deref(), ); let (valid_calls, invalid_feedback) = if !invalid_calls.is_empty() @@ -1019,7 +1199,7 @@ pub async fn chat_completion( .parse_complete_with_fallback(&output.decode_output) .await; let dropped = - retain_tool_calls_forced_name(&mut parsed_calls, forced_tool_name.as_deref()); + retain_tool_calls_forced_name(&mut parsed_calls, forced_tool_name_clone.as_deref()); if dropped > 0 { crate::log_warn!( "Dropped {} tool call(s) that did not match forced tool_choice", @@ -1036,7 +1216,7 @@ pub async fn chat_completion( let invalid_feedback = build_invalid_tool_call_feedback( &invalid_calls, tool_schemas.as_ref(), - forced_tool_name.as_deref(), + forced_tool_name_clone.as_deref(), ); let valid_calls = validated_calls; diff --git a/src/tools/helpers.rs b/src/tools/helpers.rs index bcd1706b..6c810098 100644 --- a/src/tools/helpers.rs +++ b/src/tools/helpers.rs @@ -34,6 +34,16 @@ pub fn strict_tool_call_validation_enabled() -> bool { }) } +pub fn sanitize_tools_for_llguidance(tools: &[Tool]) -> Vec { + tools.iter().map(sanitize_tool_schema).collect() +} + +fn sanitize_tool_schema(tool: &Tool) -> Tool { + let mut tool = tool.clone(); + tool.function.parameters = crate::tools::schema::sanitize_schema_for_llguidance(&tool.function.parameters); + tool +} + /// Build a map of tool names to their parameter schemas pub fn build_tool_schema_map(tools: &[Tool]) -> HashMap { tools diff --git a/src/tools/mod.rs b/src/tools/mod.rs index 4270fa08..8e4cbfe9 100644 --- a/src/tools/mod.rs +++ b/src/tools/mod.rs @@ -23,7 +23,7 @@ pub struct ToolBuilder { } impl ToolBuilder { - fn new(name: String, description: String) -> Self { + pub fn new(name: String, description: String) -> Self { Self { name, description, @@ -259,22 +259,45 @@ impl ToolFormat { let config = ToolConfig::for_model_type(model_type); let start_tag = &config.start_token_str; let end_tag = &config.end_token_str; - let rule = format!( - "MOST IMPORTANT INSTRUCTION, **MUST** FOLLOW: For each function call, you MUST wrap function name and arguments in {start_tag}{end_tag} tags.\n\n\ - Do NOT USE ANY code blocks. Required format:\n\ - {start_tag}\n\ - {{\"name\": \"\", \"arguments\": }}\n\ - {end_tag}\n\n\ - Rules:\n\ - - Wrap function name and arguments with {start_tag} and {end_tag} tags\n\ - - Always use the exact {start_tag}{end_tag} format shown above\n\ - - Do NOT USE ANY code blocks\n\ - - Tool-use must be placed **at the end** of your response (**AFTER REASONING**), **top-level**, and not nested within other tags.\n\ - - Always adhere to this format for the tool use to ensure proper parsing and execution.\n\ - - The \"name\" and \"arguments\" are necessary fields\n\ - - DO NOT call ANY functions that DOES NOT defined between and \n\ - - MUST FOLLOW the above instruction when using tool call!", - ); - rule + match model_type { + crate::utils::config::ModelType::Qwen3 + | crate::utils::config::ModelType::Qwen3MoE + | crate::utils::config::ModelType::Qwen3VL => { + format!( + "MOST IMPORTANT INSTRUCTION, **MUST** FOLLOW: For each function call, you MUST use the QwenCoder tool format.\n\n\ + Required format:\n\ + {start_tag}\n\ + >\n\ + >\n\ + ...\n\ + \n\ + {end_tag}\n\n\ + Rules:\n\ + - Wrap tool calls with {start_tag} and {end_tag}\n\ + - Use and tags\n\ + - Each value MUST be valid JSON (string/object/array/number/bool)\n\ + - Do NOT USE ANY code blocks\n\ + - Tool-use must be placed at the end of your response (after reasoning)\n\ + - Only call tools defined between and \n\ + - MUST FOLLOW the above instruction when using tool call!", + ) + } + _ => format!( + "MOST IMPORTANT INSTRUCTION, **MUST** FOLLOW: For each function call, you MUST wrap function name and arguments in {start_tag}{end_tag} tags.\n\n\ + Do NOT USE ANY code blocks. Required format:\n\ + {start_tag}\n\ + {{\"name\": \"\", \"arguments\": }}\n\ + {end_tag}\n\n\ + Rules:\n\ + - Wrap function name and arguments with {start_tag} and {end_tag} tags\n\ + - Always use the exact {start_tag}{end_tag} format shown above\n\ + - Do NOT USE ANY code blocks\n\ + - Tool-use must be placed **at the end** of your response (**AFTER REASONING**), **top-level**, and not nested within other tags.\n\ + - Always adhere to this format for the tool use to ensure proper parsing and execution.\n\ + - The \"name\" and \"arguments\" are necessary fields\n\ + - DO NOT call ANY functions that DOES NOT defined between and \n\ + - MUST FOLLOW the above instruction when using tool call!", + ), + } } } diff --git a/src/tools/parser.rs b/src/tools/parser.rs index 99b81c00..a8c09b76 100644 --- a/src/tools/parser.rs +++ b/src/tools/parser.rs @@ -5,7 +5,10 @@ use super::{new_tool_call, ToolCall}; use regex::Regex; -use serde_json::Value; +use serde::{de::{Deserializer, MapAccess, Visitor}}; +use serde_json::{Map, Value}; +use std::fmt; +use std::sync::OnceLock; /// Parser for extracting tool calls from model output text #[allow(dead_code)] @@ -47,12 +50,18 @@ impl ToolParser { /// Parse tool calls from model output /// Only parses tool calls from the final answer (after reasoning end markers) pub fn parse(&self, text: &str) -> Vec { - let mut calls = Vec::new(); let mut call_id = 0; // Extract only the final answer portion (after reasoning ends) let final_answer = Self::extract_final_answer(text); + // Mistral-style parsing: strip wrappers and parse JSON or JSON array. + let mut calls = parse_tool_calls_from_text(&final_answer, &mut call_id); + + if !calls.is_empty() { + return calls; + } + // Try Qwen format first if let Some(qwen_calls) = self.parse_qwen_format(&final_answer, &mut call_id) { calls.extend(qwen_calls); @@ -157,10 +166,8 @@ impl ToolParser { } if let Ok(parsed) = serde_json::from_str::(trimmed) { - if let Some(call) = self.value_to_tool_call(&parsed, call_id) { - calls.push(call); - } - } + calls.extend(self.value_to_tool_call(&parsed, call_id)); + } } } } @@ -179,10 +186,11 @@ impl ToolParser { // Simple approach: try to parse the entire text as JSON first if let Ok(parsed) = serde_json::from_str::(text.trim()) { - if let Some(call) = self.value_to_tool_call(&parsed, call_id) { - return Some(vec![call]); - } - } + let parsed_calls = self.value_to_tool_call(&parsed, call_id); + if parsed_calls.is_some() { + return Some(vec![parsed_calls.unwrap()]); + } + } // Look for JSON blocks in the text let mut depth = 0; @@ -202,9 +210,7 @@ impl ToolParser { if let Some(s) = start { let json_str = &text[s..=i]; if let Ok(parsed) = serde_json::from_str::(json_str) { - if let Some(call) = self.value_to_tool_call(&parsed, call_id) { - calls.push(call); - } + calls.extend(self.value_to_tool_call(&parsed, call_id).into_iter()); } } start = None; @@ -229,9 +235,7 @@ impl ToolParser { for cap in re.captures_iter(text) { if let Some(content) = cap.get(1) { if let Ok(parsed) = serde_json::from_str::(content.as_str().trim()) { - if let Some(call) = self.value_to_tool_call(&parsed, call_id) { - calls.push(call); - } + calls.extend(self.value_to_tool_call(&parsed, call_id).into_iter()); } } } @@ -303,6 +307,300 @@ impl ToolParser { } } +// --- Mistral-style tool parsing helpers --- + +// Accept either `{...}` **or** a `"stringified { ... }"` +fn flexible_args<'de, D>(d: D) -> std::result::Result +where + D: Deserializer<'de>, +{ + struct ArgVisitor; + + impl<'de> Visitor<'de> for ArgVisitor { + type Value = Value; + + fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("an object or a JSON-encoded string containing an object") + } + + fn visit_map(self, mut m: M) -> std::result::Result + where + M: MapAccess<'de>, + { + let mut map = Map::new(); + while let Some((k, v)) = m.next_entry()? { + map.insert(k, v); + } + Ok(Value::Object(map)) + } + + fn visit_str(self, s: &str) -> std::result::Result + where + E: serde::de::Error, + { + serde_json::from_str(s).map_err(|e| E::custom(format!("inner JSON error: {e}"))) + } + } + + d.deserialize_any(ArgVisitor) +} + +#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] +struct CalledFunctionParameters { + #[serde(alias = "function")] + name: String, + #[serde(alias = "arguments", deserialize_with = "flexible_args")] + parameters: Value, +} + +fn contains_tool_call_prefix(prefix: &str) -> bool { + prefix.contains("") + || prefix.contains("<|tool▁call▁begin|>") + || prefix.contains("<|python_tag|>") + || prefix.contains("[TOOL_CALLS]") +} + +fn process_model_specific_message(message: &str) -> String { + static DEEPSEEK_REGEX: OnceLock = OnceLock::new(); + static QWEN_REGEX: OnceLock = OnceLock::new(); + + let deepseek_regex = DEEPSEEK_REGEX.get_or_init(|| { + Regex::new( + r"(?s)<|tool▁call▁begin|>function<|tool▁sep|>(?P[^\n]+)\n```json\n(?P.+?)\n```<|tool▁call▁end|>", + ) + .unwrap() + }); + let qwen_regex = QWEN_REGEX + .get_or_init(|| Regex::new(r"(?s)(?P.*?)").unwrap()); + + if let Some(message) = message.strip_prefix("<|python_tag|>") { + message + .strip_suffix("<|eom_id|>") + .unwrap_or(message) + .to_string() + } else if qwen_regex.is_match(message) { + if let Some(caps) = qwen_regex.captures(message) { + let inner = caps.name("inner").unwrap().as_str(); + return inner.trim().to_string(); + } + message.to_string() + } else if let Some(message) = message + .strip_prefix("[TOOL_CALLS][") + .and_then(|s| s.strip_suffix("]")) + { + message.to_string() + } else if deepseek_regex.find(message).is_some() { + let mut calls = Vec::new(); + for caps in deepseek_regex.captures_iter(message) { + let name = caps + .name("name") + .map(|m| m.as_str().trim().to_string()) + .unwrap_or_default(); + let json_str = caps.name("json").map(|m| m.as_str().trim()).unwrap_or("{}"); + let arguments: Value = + serde_json::from_str(json_str).unwrap_or_else(|_| Value::Object(Map::new())); + let args_str = serde_json::to_string(&arguments).unwrap_or_else(|_| "{}".to_string()); + calls.push(new_tool_call( + format!("call_{}", calls.len()), + name, + args_str, + )); + } + serde_json::to_string(&calls).unwrap_or_else(|_| message.to_string()) + } else { + message.to_string() + } +} + +fn fix_broken_json(raw: &str) -> String { + if raw.contains(r#""arguments":"{"#) { + let tmp = raw.replacen(r#""arguments":"{"#, r#""arguments":{"#, 1); + tmp.replacen(r#"}"}"#, r#"}}"#, 1) + } else { + raw.to_string() + } +} + +fn json_value_to_tool_call(value: &Value, call_id: &mut usize) -> Option { + let name = value.get("name")?.as_str()?.to_string(); + let arguments = value.get("arguments")?; + let args_str = if arguments.is_string() { + arguments.as_str().unwrap_or("{}").to_string() + } else { + serde_json::to_string(arguments).ok()? + }; + + let call = new_tool_call( + format!("call_{}", call_id), + name, + args_str, + ); + *call_id += 1; + Some(call) +} + +/// Parse tool calls from a raw message string (handles model-specific wrappers). +pub fn parse_tool_calls_from_text(text: &str, call_id: &mut usize) -> Vec { + // First, handle explicit wrappers (may appear multiple times) + if text.contains("") { + let mut calls = Vec::new(); + if let Ok(re) = Regex::new(r"(?s)\s*(.*?)\s*") { + for cap in re.captures_iter(text) { + if let Some(inner) = cap.get(1) { + let inner = inner.as_str().trim(); + if let Ok(parsed) = serde_json::from_str::(inner) { + if let Some(call) = json_value_to_tool_call(&parsed, call_id) { + calls.push(call); + } + continue; + } + + if let Some(call) = parse_function_tag_tool_call(inner, call_id) { + calls.push(call); + } + } + } + } + if !calls.is_empty() { + return calls; + } + } + + let processed = process_model_specific_message(text); + let processed = fix_broken_json(&processed); + + if let Ok(deser) = serde_json::from_str::(&processed) { + let args = serde_json::to_string(&deser.parameters).unwrap_or_else(|_| "{}".to_string()); + let call = new_tool_call( + format!("call_{}", call_id), + deser.name, + args, + ); + *call_id += 1; + return vec![call]; + } + + if let Ok(deser) = serde_json::from_str::>(&processed) { + let mut out = Vec::new(); + for item in deser { + let args = serde_json::to_string(&item.parameters).unwrap_or_else(|_| "{}".to_string()); + out.push(new_tool_call( + format!("call_{}", call_id), + item.name, + args, + )); + *call_id += 1; + } + return out; + } + + Vec::new() +} + +/// Checks if the given prefix could be the start of, or the entire JSON serialization of a tool call. +/// Returns (could_be_tool, is_complete_tool). +pub fn prefix_could_be_tool(prefix: &str) -> (bool, bool) { + if prefix.trim().is_empty() { + return (false, false); + } + + // If we already have a full ..., attempt to parse directly. + if prefix.contains("") { + let mut call_id = 0; + if !parse_tool_calls_from_text(prefix, &mut call_id).is_empty() { + return (false, true); + } + } + + // If we see a start tag, it's at least a potential tool call. + if prefix.contains("") { + return (true, false); + } + + let processed = process_model_specific_message(prefix); + let processed = fix_broken_json(&processed); + + let checks = [ + could_be_json::, + could_be_json::>, + ]; + + for check in checks { + let (could_be, complete) = check(&processed); + if could_be || complete { + return (could_be, complete); + } + } + + ( + contains_tool_call_prefix(prefix) || contains_tool_call_prefix(&processed), + false, + ) +} + +fn could_be_json(text_prefix: &str) -> (bool, bool) +where + T: serde::de::DeserializeOwned, +{ + if text_prefix.trim().is_empty() { + return (false, false); + } + match serde_json::from_str::(text_prefix) { + Ok(_) => (false, true), + Err(e) if e.is_eof() => (true, false), + _ => (false, false), + } +} + +fn parse_function_tag_tool_call(inner: &str, call_id: &mut usize) -> Option { + let func_tag = "')? + name_start; + if name_end <= name_start { + return None; + } + let func_name = inner[name_start..name_end].trim(); + if func_name.is_empty() { + return None; + } + + let mut params = Map::new(); + let mut pos = name_end + 1; + while let Some(param_tag_pos) = inner[pos..].find("") + .map(|v| v + value_start)?; + if value_end <= value_start { + break; + } + let value_raw = inner[value_start..value_end].trim(); + let value = serde_json::from_str::(value_raw) + .unwrap_or_else(|_| Value::String(value_raw.to_string())); + params.insert(key.to_string(), value); + pos = value_end + "".len(); + } + + let args = Value::Object(params); + let args_str = serde_json::to_string(&args).ok()?; + + let call = new_tool_call( + format!("call_{}", call_id), + func_name.to_string(), + args_str, + ); + *call_id += 1; + Some(call) +} + #[cfg(test)] mod tests { use super::*; @@ -372,4 +670,25 @@ mod tests { assert!(parser.has_tool_calls(r#"{"name": "foo", "arguments": {}}"#)); assert!(!parser.has_tool_calls("Just a normal response")); } + + #[test] + fn test_parse_function_tag_format() { + let parser = ToolParser::new(); + let text = r#" + + +{"bar": 1} + + +qux + + +"#; + + let calls = parser.parse(text); + assert_eq!(calls.len(), 1); + assert_eq!(calls[0].function.name, "my_tool"); + assert!(calls[0].clone().function.arguments.unwrap().contains("\"foo\"")); + assert!(calls[0].clone().function.arguments.unwrap().contains("\"baz\"")); + } } diff --git a/src/tools/schema.rs b/src/tools/schema.rs index fa5ebce0..76e1a450 100644 --- a/src/tools/schema.rs +++ b/src/tools/schema.rs @@ -3,8 +3,314 @@ //! //! Provides helpers for working with JSON Schema in tool definitions. -use serde_json::{json, Value}; -use std::collections::HashMap; +use crate::tools::Tool; +use serde_json::{json, Map, Value}; +use std::collections::{HashMap, HashSet}; +use crate::utils::guidance::{TopLevelGrammarExt, GrammarError, GrammarResult}; +use llguidance::api::TopLevelGrammar; + +/// Remove JSON Schema features that llguidance doesn't support. +/// Currently strips all "format" fields recursively. +pub fn sanitize_schema_for_llguidance(schema: &Value) -> Value { + match schema { + Value::Object(map) => { + let mut out = Map::new(); + for (key, value) in map { + if key == "format" { + continue; + } + out.insert(key.clone(), sanitize_schema_for_llguidance(value)); + } + Value::Object(out) + } + Value::Array(items) => { + Value::Array(items.iter().map(sanitize_schema_for_llguidance).collect()) + } + _ => schema.clone(), + } +} + +/// Lark grammar helper functions for llguidance constraint building +/// Sanitize string for Lark grammar - only allow ASCII characters +fn lark_quote(value: &str) -> String { + // Strip non-ASCII characters to prevent grammar parser errors + let sanitized: String = value + .chars() + .filter(|c| c.is_ascii()) + .collect(); + let escaped = sanitized.replace('\\', "\\\\").replace('"', "\\\""); + format!("\"{}\"", escaped) +} + +/// Convert token IDs to Lark special token syntax <[token_id]> +/// This is used when the tokenizer has canonical tokenization for the tag +fn lark_special_token(token_ids: &HashSet) -> String { + if token_ids.is_empty() { + return String::new(); + } + // Join multiple token IDs with | + let ids: Vec = token_ids.iter().map(|id| format!("[{}]", id)).collect(); + format!("<{}>", ids.join(",")) +} + +fn _lark_literal(value: &str, is_special: bool) -> String { + if is_special && value.starts_with('<') && value.ends_with('>') { + // Only allow ASCII special tags + let sanitized: String = value + .chars() + .filter(|c| c.is_ascii()) + .collect(); + sanitized + } else { + lark_quote(value) + } +} + +/// Builder for constructing tool call grammars +pub struct ToolGrammarBuilder { + tools: Vec, + start_tag: String, + end_tag: String, + start_is_special: bool, + end_is_special: bool, + start_token_ids: Option>, + end_token_ids: Option>, +} + +impl ToolGrammarBuilder { + pub fn new() -> Self { + Self { + tools: Vec::new(), + start_tag: String::new(), + end_tag: String::new(), + start_is_special: false, + end_is_special: false, + start_token_ids: None, + end_token_ids: None, + } + } + + pub fn tools(mut self, tools: &[Tool]) -> Self { + self.tools.extend(tools.iter().cloned()); + self + } + + pub fn start_tag(mut self, tag: impl Into) -> Self { + self.start_tag = tag.into(); + self + } + + pub fn end_tag(mut self, tag: impl Into) -> Self { + self.end_tag = tag.into(); + self + } + + pub fn start_is_special(mut self, special: bool) -> Self { + self.start_is_special = special; + self + } + + pub fn end_is_special(mut self, special: bool) -> Self { + self.end_is_special = special; + self + } + + pub fn start_token_ids(mut self, ids: Option>) -> Self { + self.start_token_ids = ids; + self + } + + pub fn end_token_ids(mut self, ids: Option>) -> Self { + self.end_token_ids = ids; + self + } + + /// Build Lark expression for JSON tool schema content + pub fn build_json(self) -> TopLevelGrammar { + let mut rules = Vec::new(); + + let start_tag = self.get_tag_or_token_id(&self.start_tag, &self.start_token_ids, self.start_is_special); + let end_tag = self.get_tag_or_token_id(&self.end_tag, &self.end_token_ids, self.end_is_special); + + rules.push("start: tool_call".to_string()); + rules.push(format!("tool_call: {} tool_obj {}", start_tag, end_tag)); + rules.push("tool_obj: %json {\"type\":\"object\",\"properties\":{\"name\":{\"type\":\"string\"},\"arguments\":{\"type\":\"object\"}},\"required\":[\"name\",\"arguments\"]}".to_string()); + rules.push("json_array: \"[\" obj (\",\" obj)* \"]\"".to_string()); + + for tool in &self.tools { + let tool_name = tool.function.name.replace("-", "_"); + let schema_str = serde_json::to_string(&tool.function.parameters).unwrap_or_default(); + rules.push(format!("obj_{tool_name}: %json {schema_str}")); + } + + if rules.len() <= 4 { + rules.push("obj: %json {\"type\": \"object\"}".to_string()); + } else { + rules.extend(self.tools.iter().enumerate().map(|(_i, t)| { + let name = t.function.name.replace("-", "_"); + format!("obj_{name}: %json {}", serde_json::to_string(&t.function.parameters).unwrap_or_default()) + })); + + let obj_names = self.tools.iter().map(|t| { + format!("obj_{}", t.function.name.replace("-", "_")) + }).collect::>().join(" | "); + rules.push(format!("obj: {}", obj_names)); + } + + // rules.push(format!("ws: {}", lark_ws_regex())); + + let lark = rules.join("\n") + "\n"; + crate::log_debug!("[llg] ToolGrammarBuilder::build_json lark: {}", &lark); + TopLevelGrammar::from_lark_utf8(&lark) + } + + /// Build Lark expression for valid XML parameter content + fn build_xml_value_expression(schema: &serde_json::Value) -> String { + let param_type = schema.get("type").and_then(|t| t.as_str()).unwrap_or("string"); + + match param_type { + "string" => { + // Match any text content without look-around assertions + // Simple pattern: match any character except < or any < followed by non-slash + if let Ok(val) = std::env::var("VLLM_LLG_DEFAULT_XML_STR") { + format!("{}", val) + } else { + r#"/[^<]*/"#.to_string() + // r"/[^<]+(<[^/][^<]*)*/".to_string() + // ^^ nested tag capture produces infinite generation - limitation of XML + } + }, + "integer" => r"/-?[0-9]+/".to_string(), + "number" => r"/-?[0-9]+(\.[0-9]+)?/".to_string(), + "boolean" => r"/^(true|false)$/".to_string(), + "array" => r"/\[[^\]]*\]/".to_string(), + "object" => r"/\{[^\}]*\}/".to_string(), + _ => r"/(?s:.*)/".to_string(), + } + } + + /// Build Lark expression for XML tool schema content + pub fn build_xml(self) -> TopLevelGrammar { + let mut rules: Vec = Vec::new(); + + // Build envelope tag using token IDs when available + let envelope_start_tag = self.get_envelope_tag(&self.start_tag, &self.start_token_ids, self.start_is_special); + let envelope_end_tag = self.get_envelope_tag(&self.end_tag, &self.end_token_ids, self.end_is_special); + + let tool_rule_names: Vec = (0..self.tools.len()).map(|i| format!("tool_{i}")).collect(); + rules.push("start: tool_call".to_string()); + rules.push(format!("tool_call: {} tool_content {}", envelope_start_tag, envelope_end_tag)); + + // Get required params from schema + let get_required_params = |params_schema: &serde_json::Value| -> Vec { + params_schema.get("required") + .and_then(|r| r.as_array()) + .map(|arr| arr.iter().filter_map(|v| v.as_str().map(|s| s.to_string())).collect()) + .unwrap_or_default() + }; + + for (tool_idx, tool) in self.tools.iter().enumerate() { + let tool_name_ascii: String = tool.function.name.chars().filter(|c| c.is_ascii()).collect(); + let func_start = lark_quote(&format!("", tool_name_ascii)); + let func_end = lark_quote(""); + let params_schema = &tool.function.parameters; + let props = params_schema.get("properties").and_then(|p| p.as_object()); + let required_params = get_required_params(params_schema); + + if let Some(props) = props { + let mut param_rules_vec: Vec = Vec::new(); + + for (param_idx, (param_name, schema)) in props.iter().enumerate() { + let param_name_ascii: String = param_name.chars().filter(|c| c.is_ascii()).collect(); + let param_tag = lark_quote(&format!("", param_name_ascii)); + let param_end = lark_quote(""); + let value_rule = format!("value_{tool_idx}_{param_idx}"); + let param_rule = format!("param_{tool_idx}_{param_idx}"); + + // Determine the Lark expression for valid XML content based on schema type + let value_expr = Self::build_xml_value_expression(schema); + rules.push(format!("{value_rule}: {value_expr}")); + rules.push(format!("{param_rule}: {param_tag} {value_rule} {param_end}")); + + // Add to param_rules_vec with ? for optional, bare for required + if required_params.contains(param_name) { + param_rules_vec.push(param_rule.clone()); + } else { + param_rules_vec.push(format!("({param_rule})?")); + } + } + + let params_expr = param_rules_vec.join(" "); + rules.push(format!("tool_{tool_idx}: {func_start} {params_expr} {func_end}")); + } else { + // No parameters - just function tags + rules.push(format!("tool_{tool_idx}: {func_start} {func_end}")); + } + } + + // Build tool_content with alternation of all tools + let tool_variants = tool_rule_names.join(" | "); + rules.push(format!("tool_content: {tool_variants}")); + // rules.push(format!("_WS: {}", lark_ws_regex())); + + let lark = rules.join("\n") + "\n"; + crate::log_debug!("[llg] ToolGrammarBuilder::build_json lark: {}", &lark); + TopLevelGrammar::from_lark_utf8(&lark) + } + + /// Get envelope tag (start/end) using token IDs when available, falling back to string literals + fn get_envelope_tag(&self, tag: &str, token_ids: &Option>, is_special: bool) -> String { + if let Some(ids) = token_ids { + if !ids.is_empty() { + return lark_special_token(ids); + } + } + + if is_special && tag.starts_with('<') && tag.ends_with('>') { + // Only allow ASCII special tags + let sanitized: String = tag.chars().filter(|c| c.is_ascii()).collect(); + sanitized + } else { + lark_quote(tag) + } + } + + fn get_tag_or_token_id(&self, tag: &str, token_ids: &Option>, is_special: bool) -> String { + if let Some(ids) = token_ids { + if !ids.is_empty() { + return format!("<{}>", ids.iter().map(|id| format!("[{}]", id)).collect::>().join(",")); + } + } + + if is_special && tag.starts_with('<') && tag.ends_with('>') { + tag.to_string() + } else { + lark_quote(tag) + } + } +} + +/// Build a Lark grammar for QwenCoder-style function/parameter tags with JSON values. +/// Used for models like Qwen3-Coder that use XML-style tool call envelopes. +pub fn build_xml_tool_lark_grammar( + tools: &[Tool], + start: &str, + end: &str, + start_is_special: bool, + end_is_special: bool, + start_token_ids: Option<&HashSet>, + end_token_ids: Option<&HashSet>, +) -> TopLevelGrammar { + ToolGrammarBuilder::new() + .tools(tools) + .start_tag(start) + .end_tag(end) + .start_is_special(start_is_special) + .end_is_special(end_is_special) + .start_token_ids(start_token_ids.cloned()) + .end_token_ids(end_token_ids.cloned()) + .build_xml() +} /// Builder for creating JSON Schema objects #[derive(Debug, Clone, Default)] @@ -251,3 +557,1005 @@ pub mod common { .build() } } + +/// Build a Lark grammar for choice constraints (structured outputs choice field) +pub fn build_choice_lark_grammar(choices: &[String]) -> GrammarResult { + if choices.is_empty() { + return Err(GrammarError::InvalidGrammar("structured_outputs.choice must include at least one option".to_string())); + } + + let mut parts = Vec::with_capacity(choices.len()); + for choice in choices { + if choice.is_empty() { + return Err(GrammarError::InvalidGrammar("structured_outputs.choice cannot contain empty strings".to_string())); + } + parts.push(lark_quote(choice)); + } + + let body = parts.join(" | "); + let lark_string = format!("start: {}\n", body); + Ok(TopLevelGrammar::from_lark_utf8(&lark_string)) +} + +/// Normalize a tag string for structural_tag parsing +fn normalize_tag_pair(tag: &str) -> Result<(String, String), String> { + let trimmed = tag.trim(); + if trimmed.is_empty() { + return Err("structured_outputs.structural_tag.tag cannot be empty".to_string()); + } + + if trimmed.starts_with('<') && trimmed.ends_with('>') { + let inner = trimmed + .trim_start_matches('<') + .trim_end_matches('>') + .trim_start_matches('/'); + if inner.is_empty() { + return Err("structured_outputs.structural_tag.tag is invalid".to_string()); + } + let start = if trimmed.starts_with("", inner) + } else { + trimmed.to_string() + }; + let end = format!("", inner); + Ok((start, end)) + } else { + Ok((format!("<{}>", trimmed), format!("", trimmed))) + } +} + +/// Parse structural_tag for structured outputs +pub fn parse_structural_tag(value: &Value) -> Result<(String, String, Value), String> { + let obj = value.as_object().ok_or_else(|| { + "structured_outputs.structural_tag must be an object".to_string() + })?; + + let schema = obj.get("schema").cloned().ok_or_else(|| { + "structured_outputs.structural_tag.schema is required".to_string() + })?; + + let start = obj.get("start_tag").or_else(|| obj.get("start")).or_else(|| obj.get("tag")); + let end = obj.get("end_tag").or_else(|| obj.get("end")); + + let (start_tag, end_tag) = match (start, end) { + (Some(start_val), Some(end_val)) => { + let start = start_val.as_str().ok_or_else(|| { + "structured_outputs.structural_tag.start_tag must be a string".to_string() + })?; + let end = end_val.as_str().ok_or_else(|| { + "structured_outputs.structural_tag.end_tag must be a string".to_string() + })?; + (start.to_string(), end.to_string()) + } + (Some(tag), None) if obj.contains_key("tag") => normalize_tag_pair(tag.as_str().ok_or_else(|| "structured_outputs.structural_tag.tag must be a string".to_string())?)?, + _ => { + return Err("structured_outputs.structural_tag requires tag or start_tag/end_tag".to_string()); + } + }; + + Ok((start_tag, end_tag, schema)) +} + +/// Convert a Value schema to a Vec of Tool objects using ToolBuilder +/// The schema should be an object where keys are tool names and values are tool schemas +pub fn schema_to_tools(schema: &Value) -> Vec { + let mut tools = Vec::new(); + if let Value::Object(obj) = schema { + for (name, tool_schema) in obj { + if let Value::Object(props) = tool_schema { + if let Some(params) = props.get("parameters") { + let builder = crate::tools::ToolBuilder::new(name.clone(), "".to_string()) + .parameters_schema(params.clone()); + tools.push(builder.build()); + } + } + } + } + tools +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::utils::guidance::get_lark_from_top_level_grammar; + + #[test] + fn test_sanitize_schema_for_llguidance_strips_format() { + let schema = json!({ + "type": "object", + "properties": { + "url": {"type": "string", "format": "uri"}, + "nested": {"type": "object", "properties": {"id": {"type": "string", "format": "uuid"}}} + } + }); + let sanitized = sanitize_schema_for_llguidance(&schema); + assert!(sanitized["properties"]["url"].get("format").is_none()); + assert!(sanitized["properties"]["nested"]["properties"]["id"].get("format").is_none()); + } + + #[test] + fn test_build_choice_lark_grammar_empty_string() { + let result = build_choice_lark_grammar(&["".to_string()]); + assert!(result.is_err()); + } + + #[test] + fn test_parse_structural_tag_missing_schema() { + let value = json!({}); + let result = parse_structural_tag(&value); + assert!(result.is_err()); + } + + #[test] + fn test_parse_structural_tag_start_end() { + let value = json!({ + "start_tag": "", + "end_tag": "", + "schema": {"type": "object"} + }); + let result = parse_structural_tag(&value); + assert!(result.is_ok()); + let (start, end, schema) = result.unwrap(); + assert_eq!(start, ""); + assert_eq!(end, ""); + assert_eq!(schema, json!({"type": "object"})); + } + + #[test] + fn test_parse_structural_tag_tag() { + let value = json!({ + "tag": "", + "schema": {"type": "object"} + }); + let result = parse_structural_tag(&value); + assert!(result.is_ok()); + let (start, end, _) = result.unwrap(); + assert_eq!(start, ""); + assert_eq!(end, ""); + } + + #[test] + fn test_parse_structural_tag_invalid() { + let value = json!({ + "schema": {"type": "object"} + }); + let result = parse_structural_tag(&value); + assert!(result.is_err()); + } + + #[test] + fn test_lark_quote_escapes_special_chars() { + let result = lark_quote("test\"value"); + assert!(result.contains("test\\\"value")); + } + + #[test] + fn test_lark_literal_special_tags() { + let result = _lark_literal("", true); + assert_eq!(result, ""); + } + + #[test] + fn test_lark_literal_regular_string() { + let result = _lark_literal("regular", false); + assert!(result.contains("\"regular\"")); + } + + #[test] + fn test_lark_special_token_single_id() { + let mut ids = HashSet::new(); + ids.insert(151657); + let result = lark_special_token(&ids); + assert_eq!(result, "<[151657]>"); + } + + #[test] + fn test_lark_special_token_multiple_ids() { + let mut ids = HashSet::new(); + ids.insert(151657); + ids.insert(151658); + let result = lark_special_token(&ids); + assert!(result.contains("[151657]")); + assert!(result.contains("[151658]")); + } + + #[test] + fn test_lark_special_token_empty() { + let ids = HashSet::new(); + let result = lark_special_token(&ids); + assert_eq!(result, ""); + } + + #[test] + fn test_build_xml_tool_lark_grammar_qwen3_coder_required_only() { + // Test Qwen3-Coder XML tool format with required attributes only + let tools = vec![ + crate::tools::ToolBuilder::new("search".to_string(), "Search the web".to_string()) + .param("query", "string", "Search query", true) + .build(), + ]; + let grammar = build_xml_tool_lark_grammar(&tools, "", "", false, false, None, None); + let lark_str = get_lark_from_top_level_grammar(&grammar); + println!("{}", &lark_str); + + // Qwen3Coder uses XML format with start: tool_call + assert!(lark_str.contains("start: tool_call"), "Should have start: tool_call"); + assert!(lark_str.contains(""), "Should contain function tag"); + assert!(lark_str.contains("tool_0:"), "Should contain tool_0 rule"); + } + + #[test] + fn test_build_xml_tool_lark_grammar_qwen3_coder_optional() { + // Test Qwen3-Coder XML tool format with optional attributes + let tools = vec![ + crate::tools::ToolBuilder::new("get_weather".to_string(), "Get weather".to_string()) + .param("city", "string", "City name", true) + .param("units", "string", "Temperature units (optional)", false) + .build(), + ]; + let grammar = build_xml_tool_lark_grammar(&tools, "", "", false, false, None, None); + let lark_str = get_lark_from_top_level_grammar(&grammar); + + assert!(lark_str.contains("start: tool_call"), "Should have start: tool_call"); + assert!(lark_str.contains(""), "Should contain function tag"); + assert!(lark_str.contains("city"), "Should contain city parameter"); + assert!(lark_str.contains("units"), "Should contain optional units parameter"); + } + + #[test] + fn test_build_xml_tool_lark_grammar_qwen3_coder_deep_parameters() { + // Test Qwen3-Coder XML tool format with nested/complex parameters + let tools = vec![ + crate::tools::ToolBuilder::new("edit_file".to_string(), "Edit a file with complex parameters".to_string()) + .param("file_path", "string", "Path to the file", true) + .param("old_string", "string", "String to replace", true) + .param("new_string", "string", "Replacement string", true) + .param("replace_all", "boolean", "Replace all occurrences", false) + .build(), + ]; + let grammar = build_xml_tool_lark_grammar(&tools, "", "", false, false, None, None); + let lark_str = get_lark_from_top_level_grammar(&grammar); + println!("XML Grammar:\n{}", &lark_str); + + // Verify the grammar contains XML structure + assert!(lark_str.contains("start: tool_call"), "Should have start: tool_call"); + // Note: uses U+200C (zero-width non-joiner) which is invisible + assert!(lark_str.contains("function="), "Should contain function tag with attribute"); + + // Verify all parameter tags are present + // Note: uses U+200C (zero-width non-joiner) which is invisible + assert!(lark_str.contains("parameter=file_path"), "Should contain file_path parameter tag"); + assert!(lark_str.contains("parameter=old_string"), "Should contain old_string parameter tag"); + assert!(lark_str.contains("parameter=new_string"), "Should contain new_string parameter tag"); + assert!(lark_str.contains("parameter=replace_all"), "Should contain replace_all parameter tag"); + + // Verify parameter rules reference the correct types + assert!(lark_str.contains("param_0_0:"), "Should have param_0_0 rule for first param"); + assert!(lark_str.contains("param_0_1:"), "Should have param_0_1 rule for second param"); + assert!(lark_str.contains("param_0_2:"), "Should have param_0_2 rule for third param"); + assert!(lark_str.contains("param_0_3:"), "Should have param_0_3 rule for fourth param"); + + // Verify tool rule has all parameters + assert!(lark_str.contains("tool_0:"), "Should have tool_0 rule"); + } + + #[test] + fn test_xml_grammar_required_params_no_wrapper() { + // Test that XML grammar puts required params directly without (...) * wrapper + let tools = vec![crate::tools::ToolBuilder::new("search_tool".to_string(), "Search tool".to_string()) + .param("query", "string", "Search query", true) // REQUIRED - should appear as bare rule reference + .build()]; + + let grammar = build_xml_tool_lark_grammar(&tools, "", "", false, false, None, None); + let lark_str = get_lark_from_top_level_grammar(&grammar); + + // Required param rule should appear directly in tool_0 (no parentheses/asterisk around it) + assert!(lark_str.contains("tool_0:"), "Should have tool_0 rule"); + assert!(lark_str.contains("param_0"), "Should have parameter rules"); + + // The required param should NOT be wrapped in (...) * pattern + // Look for the pattern where required params appear as direct references: "param_X Y" not "(param_X | ...)*" + } + + #[test] + fn test_xml_grammar_optional_params_wrapped() { + // Test that XML grammar wraps optional params with (...) * syntax + let tools = vec![crate::tools::ToolBuilder::new("mixed_tool".to_string(), "Mixed params".to_string()) + .param("required_param", "string", "Required", true) // REQUIRED + .param("optional_param", "string", "Optional", false) // OPTIONAL + .build()]; + + let grammar = build_xml_tool_lark_grammar(&tools, "", "", false, false, None, None); + let lark_str = get_lark_from_top_level_grammar(&grammar); + + println!("XML Grammar for mixed tool:\n{}", lark_str); + + // Optional parameters should appear in a (...) * pattern when there are multiple options + assert!(lark_str.contains("tool_0:"), "Should have tool_0 rule"); + } + + #[test] + fn test_xml_tool_call_structure_validates() { + // Full end-to-end: verify XML grammar produces valid llguidance TopLevelGrammar structure + let tools = vec![crate::tools::ToolBuilder::new("formatter".to_string(), "Formatter".to_string()) + .param("text", "string", "Text to format", true) + .build()]; + + let grammar = build_xml_tool_lark_grammar(&tools, "", "", false, false, None, None); + + // Grammar should have at least one sub-grammar (the tool rules) + assert!(grammar.grammars.len() > 0, "Should have generated grammars"); + } + + // === ToolGrammarBuilder JSON Mode Tests === + + #[test] + fn test_tool_grammar_builder_build_json_single_tool() { + // Test ToolGrammarBuilder.build_json() with a single tool + let tools = vec![ + crate::tools::ToolBuilder::new("search".to_string(), "Search the web".to_string()) + .param("query", "string", "Search query", true) + .build(), + ]; + let grammar = ToolGrammarBuilder::new() + .tools(&tools) + .start_tag("") + .end_tag("") + .start_is_special(false) + .end_is_special(false) + .build_json(); + + let lark_str = get_lark_from_top_level_grammar(&grammar); + + // Verify basic structure + assert!(lark_str.contains("start: tool_call"), "Should have start: tool_call"); + assert!(lark_str.contains("obj_search:"), "Should contain obj_search rule"); + assert!(lark_str.contains("query"), "Should contain query parameter"); + } + + #[test] + fn test_tool_grammar_builder_build_json_multiple_tools() { + // Test ToolGrammarBuilder.build_json() with multiple tools + let tools = vec![ + crate::tools::ToolBuilder::new("search".to_string(), "Search the web".to_string()) + .param("query", "string", "Search query", true) + .build(), + crate::tools::ToolBuilder::new("weather".to_string(), "Get weather".to_string()) + .param("city", "string", "City name", true) + .build(), + ]; + let grammar = ToolGrammarBuilder::new() + .tools(&tools) + .start_tag("") + .end_tag("") + .start_is_special(false) + .end_is_special(false) + .build_json(); + + let lark_str = get_lark_from_top_level_grammar(&grammar); + + // Verify all tools are present + assert!(lark_str.contains("start: tool_call"), "Should have start: tool_call"); + assert!(lark_str.contains("obj_search:"), "Should contain obj_search rule"); + assert!(lark_str.contains("obj_weather:"), "Should contain obj_weather rule"); + // Verify obj alternation includes both tools + assert!(lark_str.contains("obj: obj_search | obj_weather"), "Should have obj alternation"); + } + + #[test] + fn test_tool_grammar_builder_build_json_with_token_ids() { + // Test ToolGrammarBuilder.build_json() with token IDs + let tools = vec![ + crate::tools::ToolBuilder::new("search".to_string(), "Search the web".to_string()) + .param("query", "string", "Search query", true) + .build(), + ]; + let mut start_ids = HashSet::new(); + start_ids.insert(151657); + let mut end_ids = HashSet::new(); + end_ids.insert(151658); + + let grammar = ToolGrammarBuilder::new() + .tools(&tools) + .start_tag("") + .end_tag("") + .start_is_special(false) + .end_is_special(false) + .start_token_ids(Some(start_ids)) + .end_token_ids(Some(end_ids)) + .build_json(); + + let lark_str = get_lark_from_top_level_grammar(&grammar); + + // Verify token IDs are used + assert!(lark_str.contains("start: tool_call"), "Should have start: tool_call"); + assert!(lark_str.contains("<[151657]>"), "Should contain start token ID"); + assert!(lark_str.contains("<[151658]>"), "Should contain end token ID"); + } + + #[test] + fn test_tool_grammar_builder_build_json_with_special_tags() { + // Test ToolGrammarBuilder.build_json() with special tags + let tools = vec![ + crate::tools::ToolBuilder::new("search".to_string(), "Search the web".to_string()) + .param("query", "string", "Search query", true) + .build(), + ]; + let grammar = ToolGrammarBuilder::new() + .tools(&tools) + .start_tag("") + .end_tag("") + .start_is_special(true) + .end_is_special(true) + .build_json(); + + let lark_str = get_lark_from_top_level_grammar(&grammar); + + // Verify special tags are used as-is + assert!(lark_str.contains("start: tool_call"), "Should have start: tool_call"); + assert!(lark_str.contains(""), "Should contain special start tag"); + assert!(lark_str.contains(""), "Should contain special end tag"); + } + + #[test] + fn test_tool_grammar_builder_build_json_required_optional() { + // Test ToolGrammarBuilder.build_json() with mix of required/optional params + let tools = vec![ + crate::tools::ToolBuilder::new("search".to_string(), "Search the web".to_string()) + .param("query", "string", "Search query", true) + .param("max_results", "integer", "Max results", false) + .build(), + ]; + let grammar = ToolGrammarBuilder::new() + .tools(&tools) + .start_tag("") + .end_tag("") + .start_is_special(false) + .end_is_special(false) + .build_json(); + + let lark_str = get_lark_from_top_level_grammar(&grammar); + + // Verify both params are in schema, and required array is correct + assert!(lark_str.contains("start: tool_call"), "Should have start: tool_call"); + assert!(lark_str.contains("obj_search:"), "Should contain obj_search rule"); + assert!(lark_str.contains("query"), "Should contain query parameter"); + assert!(lark_str.contains("max_results"), "Should contain max_results parameter"); + assert!(lark_str.contains("\"required\""), "Should have required array"); + } + + // === ToolGrammarBuilder XML Mode Tests === + + #[test] + fn test_tool_grammar_builder_build_xml_single_tool() { + // Test ToolGrammarBuilder.build_xml() with a single tool + let tools = vec![ + crate::tools::ToolBuilder::new("search".to_string(), "Search the web".to_string()) + .param("query", "string", "Search query", true) + .build(), + ]; + let grammar = ToolGrammarBuilder::new() + .tools(&tools) + .start_tag("") + .end_tag("") + .start_is_special(false) + .end_is_special(false) + .build_xml(); + + let lark_str = get_lark_from_top_level_grammar(&grammar); + + // Verify XML structure + assert!(lark_str.contains("start: tool_call"), "Should have start: tool_call"); + assert!(lark_str.contains("tool_call:"), "Should have tool_call rule"); + assert!(lark_str.contains("function=search"), "Should contain function tag"); + assert!(lark_str.contains("parameter=query"), "Should contain parameter tag"); + assert!(lark_str.contains("param_0_0:"), "Should have param_0_0 rule"); + } + + #[test] + fn test_tool_grammar_builder_build_xml_multiple_tools() { + // Test ToolGrammarBuilder.build_xml() with multiple tools + let tools = vec![ + crate::tools::ToolBuilder::new("search".to_string(), "Search the web".to_string()) + .param("query", "string", "Search query", true) + .build(), + crate::tools::ToolBuilder::new("weather".to_string(), "Get weather".to_string()) + .param("city", "string", "City name", true) + .build(), + ]; + let grammar = ToolGrammarBuilder::new() + .tools(&tools) + .start_tag("") + .end_tag("") + .start_is_special(false) + .end_is_special(false) + .build_xml(); + + let lark_str = get_lark_from_top_level_grammar(&grammar); + + // Verify all tools are present + assert!(lark_str.contains("start: tool_call"), "Should have start: tool_call"); + assert!(lark_str.contains("tool_0:"), "Should contain tool_0 rule"); + assert!(lark_str.contains("tool_1:"), "Should contain tool_1 rule"); + assert!(lark_str.contains("tool_content:"), "Should have tool_content rule"); + // Verify tool_content has alternation + assert!(lark_str.contains("tool_content: tool_0 | tool_1"), "Should have tool alternation"); + } + + #[test] + fn test_tool_grammar_builder_build_xml_with_token_ids() { + // Test ToolGrammarBuilder.build_xml() with token IDs + let tools = vec![ + crate::tools::ToolBuilder::new("search".to_string(), "Search the web".to_string()) + .param("query", "string", "Search query", true) + .build(), + ]; + let mut start_ids = HashSet::new(); + start_ids.insert(151657); + let mut end_ids = HashSet::new(); + end_ids.insert(151658); + + let grammar = ToolGrammarBuilder::new() + .tools(&tools) + .start_tag("") + .end_tag("") + .start_is_special(false) + .end_is_special(false) + .start_token_ids(Some(start_ids)) + .end_token_ids(Some(end_ids)) + .build_xml(); + + let lark_str = get_lark_from_top_level_grammar(&grammar); + + // Verify token IDs are used for envelope tags + assert!(lark_str.contains("start: tool_call"), "Should have start: tool_call"); + assert!(lark_str.contains("<[151657]>"), "Should contain start token ID"); + assert!(lark_str.contains("<[151658]>"), "Should contain end token ID"); + } + + #[test] + fn test_tool_grammar_builder_build_xml_with_special_tags() { + // Test ToolGrammarBuilder.build_xml() with special tags + let tools = vec![ + crate::tools::ToolBuilder::new("search".to_string(), "Search the web".to_string()) + .param("query", "string", "Search query", true) + .build(), + ]; + let grammar = ToolGrammarBuilder::new() + .tools(&tools) + .start_tag("") + .end_tag("") + .start_is_special(true) + .end_is_special(true) + .build_xml(); + + let lark_str = get_lark_from_top_level_grammar(&grammar); + + // Verify special tags are used as-is + assert!(lark_str.contains("start: tool_call"), "Should have start: tool_call"); + assert!(lark_str.contains(""), "Should contain special start tag"); + assert!(lark_str.contains(""), "Should contain special end tag"); + } + + #[test] + fn test_tool_grammar_builder_build_xml_required_optional() { + // Test ToolGrammarBuilder.build_xml() with mix of required/optional params + let tools = vec![ + crate::tools::ToolBuilder::new("search".to_string(), "Search the web".to_string()) + .param("query", "string", "Search query", true) + .param("max_results", "integer", "Max results", false) + .build(), + ]; + let grammar = ToolGrammarBuilder::new() + .tools(&tools) + .start_tag("") + .end_tag("") + .start_is_special(false) + .end_is_special(false) + .build_xml(); + + let lark_str = get_lark_from_top_level_grammar(&grammar); + + // Verify both params are present + assert!(lark_str.contains("start: tool_call"), "Should have start: tool_call"); + assert!(lark_str.contains("param_0_0:"), "Should have param_0_0 rule (query - required)"); + assert!(lark_str.contains("param_0_1:"), "Should have param_0_1 rule (max_results - optional)"); + assert!(lark_str.contains("parameter=query"), "Should contain query parameter tag"); + assert!(lark_str.contains("parameter=max_results"), "Should contain max_results parameter tag"); + } + + #[test] + fn test_tool_grammar_builder_build_xml_no_parameters() { + // Test ToolGrammarBuilder.build_xml() with tool that has no parameters + let tools = vec![ + crate::tools::ToolBuilder::new("hello".to_string(), "Say hello".to_string()) + .param("query", "string", "Search query", true) + .parameters_schema(serde_json::json!({ + "type": "object", + "properties": {}, + "required": [] + })) + .build(), + ]; + let grammar = ToolGrammarBuilder::new() + .tools(&tools) + .start_tag("") + .end_tag("") + .start_is_special(false) + .end_is_special(false) + .build_xml(); + + let lark_str = get_lark_from_top_level_grammar(&grammar); + + // Verify tool with no parameters still generates valid grammar + assert!(lark_str.contains("start: tool_call"), "Should have start: tool_call"); + assert!(lark_str.contains("function=hello"), "Should contain function tag"); + } + + #[test] + fn test_tool_grammar_builder_build_json_no_parameters() { + // Test ToolGrammarBuilder.build_json() with tool that has no parameters + let tools = vec![ + crate::tools::ToolBuilder::new("hello".to_string(), "Say hello".to_string()) + .parameters_schema(serde_json::json!({ + "type": "object", + "properties": {}, + "required": [] + })) + .build(), + ]; + let grammar = ToolGrammarBuilder::new() + .tools(&tools) + .start_tag("") + .end_tag("") + .start_is_special(false) + .end_is_special(false) + .build_json(); + + let lark_str = get_lark_from_top_level_grammar(&grammar); + + // Verify tool with no parameters still generates valid grammar + assert!(lark_str.contains("start: tool_call"), "Should have start: tool_call"); + assert!(lark_str.contains("obj_hello:"), "Should contain obj_hello rule"); + } + + #[test] + fn test_tool_grammar_builder_build_json_empty_tools() { + // Test ToolGrammarBuilder.build_json() with empty tools list + let grammar = ToolGrammarBuilder::new() + .tools(&[]) + .start_tag("") + .end_tag("") + .start_is_special(false) + .end_is_special(false) + .build_json(); + + let lark_str = get_lark_from_top_level_grammar(&grammar); + + // Verify grammar is still valid with no tools + assert!(lark_str.contains("start: tool_call"), "Should have start: tool_call"); + // With no tools, obj should be a generic object + assert!(lark_str.contains("obj: %json"), "Should have obj rule with generic schema"); + } + + #[test] + fn test_tool_grammar_builder_build_xml_empty_tools() { + // Test ToolGrammarBuilder.build_xml() with empty tools list + let grammar = ToolGrammarBuilder::new() + .tools(&[]) + .start_tag("") + .end_tag("") + .start_is_special(false) + .end_is_special(false) + .build_xml(); + + let lark_str = get_lark_from_top_level_grammar(&grammar); + + // Verify grammar is still valid with no tools + assert!(lark_str.contains("start: tool_call"), "Should have start: tool_call"); + assert!(lark_str.contains("tool_content:"), "Should have tool_content rule"); + } + + #[test] + fn test_tool_grammar_builder_build_json_structure_validates() { + // Full end-to-end: verify JSON grammar produces valid llguidance TopLevelGrammar structure + let tools = vec![crate::tools::ToolBuilder::new("calculator".to_string(), "Calculator".to_string()) + .param("expression", "string", "Math expression", true) + .build()]; + + let grammar = ToolGrammarBuilder::new() + .tools(&tools) + .start_tag("") + .end_tag("") + .start_is_special(false) + .end_is_special(false) + .build_json(); + + // Grammar should have at least one sub-grammar + assert!(grammar.grammars.len() > 0, "Should have generated grammars"); + } + + #[test] + fn test_tool_grammar_builder_build_xml_structure_validates() { + // Full end-to-end: verify XML grammar produces valid llguidance TopLevelGrammar structure + let tools = vec![crate::tools::ToolBuilder::new("formatter".to_string(), "Formatter".to_string()) + .param("text", "string", "Text to format", true) + .build()]; + + let grammar = ToolGrammarBuilder::new() + .tools(&tools) + .start_tag("") + .end_tag("") + .start_is_special(false) + .end_is_special(false) + .build_xml(); + + // Grammar should have at least one sub-grammar + assert!(grammar.grammars.len() > 0, "Should have generated grammars"); + } + + // === Comprehensive ToolGrammarBuilder Tests === + + #[test] + fn test_tool_grammar_builder_build_xml_complex_full_schema() { + // Test ToolGrammarBuilder.build_xml() with complex nested schema + // and model-specific envelope tags with token IDs + let tools = vec![ + crate::tools::ToolBuilder::new("edit_file".to_string(), "Edit a file".to_string()) + .param("file_path", "string", "Path to the file", true) + .param("old_string", "string", "String to replace", true) + .param("new_string", "string", "Replacement string", true) + .param("max_replacements", "integer", "Maximum replacements", false) + .param("context", "object", "Context object", false) + .param("tags", "array", "Optional tags array", false) + .build(), + ]; + + // Build XML grammar with token IDs for envelope tags + let mut start_ids = HashSet::new(); + start_ids.insert(151657); + let mut end_ids = HashSet::new(); + end_ids.insert(151658); + + let grammar = ToolGrammarBuilder::new() + .tools(&tools) + .start_tag("") + .end_tag("") + .start_is_special(false) + .end_is_special(false) + .start_token_ids(Some(start_ids)) + .end_token_ids(Some(end_ids)) + .build_xml(); + + let lark_str = get_lark_from_top_level_grammar(&grammar); + // println!("{}", &lark_str); + + // Validate envelope layer (token IDs) + assert!(lark_str.contains("<[151657]>"), "Should have start token ID envelope"); + assert!(lark_str.contains("<[151658]>"), "Should have end token ID envelope"); + + // Validate tool_call structure + assert!(lark_str.contains("start: tool_call"), "Should have start: tool_call"); + assert!(lark_str.contains("tool_call:"), "Should have tool_call rule"); + + // Validate tool_content alternation + assert!(lark_str.contains("tool_content: tool_0"), "Should have tool_content with tool_0"); + + // Validate function tag layer + assert!(lark_str.contains("function=edit_file"), "Should have function tag"); + assert!(lark_str.contains("function="), "Should have function tag pattern"); + + // Validate parameter tags and rules + assert!(lark_str.contains("parameter=file_path"), "Should have file_path parameter tag"); + assert!(lark_str.contains("parameter=old_string"), "Should have old_string parameter tag"); + assert!(lark_str.contains("parameter=new_string"), "Should have new_string parameter tag"); + assert!(lark_str.contains("parameter=max_replacements"), "Should have max_replacements parameter tag"); + assert!(lark_str.contains("parameter=context"), "Should have context parameter tag"); + assert!(lark_str.contains("parameter=tags"), "Should have tags parameter tag"); + + // Validate param rules with correct types + assert!(lark_str.contains("param_0_0:"), "Should have param_0_0 rule (file_path - required)"); + assert!(lark_str.contains("param_0_1:"), "Should have param_0_1 rule (old_string - required)"); + assert!(lark_str.contains("param_0_2:"), "Should have param_0_2 rule (new_string - required)"); + assert!(lark_str.contains("param_0_3:"), "Should have param_0_3 rule (max_replacements - optional)"); + assert!(lark_str.contains("param_0_4:"), "Should have param_0_4 rule (context - optional)"); + assert!(lark_str.contains("param_0_5:"), "Should have param_0_5 rule (tags - optional)"); + + // Validate value rules with regex patterns for each type + assert!(lark_str.contains("value_0_0:"), "Should have value_0_0 rule for file_path"); + assert!(lark_str.contains("value_0_1:"), "Should have value_0_1 rule for old_string"); + assert!(lark_str.contains("value_0_2:"), "Should have value_0_2 rule for new_string"); + assert!(lark_str.contains("value_0_3:"), "Should have value_0_3 rule for max_replacements"); + assert!(lark_str.contains("value_0_4:"), "Should have value_0_4 rule for context"); + assert!(lark_str.contains("value_0_5:"), "Should have value_0_5 rule for tags"); + + // Validate required params are bare (no ? wrapper) + assert!(lark_str.contains("param_0_0 "), "file_path should be bare (required)"); + assert!(lark_str.contains("param_0_1 "), "old_string should be bare (required)"); + assert!(lark_str.contains("param_0_2 "), "new_string should be bare (required)"); + + // Validate optional params have ? wrapper + assert!(lark_str.contains("(param_0_3)?"), "max_replacements should be optional"); + assert!(lark_str.contains("(param_0_4)?"), "context should be optional"); + assert!(lark_str.contains("(param_0_5)?"), "tags should be optional"); + + // Validate tool rule structure + assert!(lark_str.contains("tool_0:"), "Should have tool_0 rule"); + assert!(lark_str.contains("tool_0: \"\""), "Should have tool_0 with function tags"); + } + + #[test] + fn test_tool_grammar_builder_build_json_complex_full_schema() { + // Test ToolGrammarBuilder.build_json() with complex nested schema + // and model-specific envelope tags with token IDs + let tools = vec![ + crate::tools::ToolBuilder::new("edit_file".to_string(), "Edit a file".to_string()) + .param("file_path", "string", "Path to the file", true) + .param("old_string", "string", "String to replace", true) + .param("new_string", "string", "Replacement string", true) + .param("max_replacements", "integer", "Maximum replacements", false) + .param("context", "object", "Context object", false) + .param("tags", "array", "Optional tags array", false) + .build(), + ]; + + // Build JSON grammar with token IDs for envelope tags + let mut start_ids = HashSet::new(); + start_ids.insert(151657); + let mut end_ids = HashSet::new(); + end_ids.insert(151658); + + let grammar = ToolGrammarBuilder::new() + .tools(&tools) + .start_tag("") + .end_tag("") + .start_is_special(false) + .end_is_special(false) + .start_token_ids(Some(start_ids)) + .end_token_ids(Some(end_ids)) + .build_json(); + + let lark_str = get_lark_from_top_level_grammar(&grammar); + + // Validate envelope layer (token IDs) + assert!(lark_str.contains("<[151657]>"), "Should have start token ID envelope"); + assert!(lark_str.contains("<[151658]>"), "Should have end token ID envelope"); + + // Validate tool_call structure + assert!(lark_str.contains("start: tool_call"), "Should have start: tool_call"); + assert!(lark_str.contains("tool_call:"), "Should have tool_call rule"); + + // Validate tool_obj structure with name and arguments + assert!(lark_str.contains("tool_obj:"), "Should have tool_obj rule"); + assert!(lark_str.contains("\"name\""), "Should have name in tool_obj"); + assert!(lark_str.contains("\"arguments\""), "Should have arguments in tool_obj"); + + // Validate obj rule references the tool + assert!(lark_str.contains("obj_edit_file:"), "Should have obj_edit_file rule"); + assert!(lark_str.contains("obj: obj_edit_file"), "Should have obj alternation"); + + // Validate JSON schema contains all parameters + assert!(lark_str.contains("file_path"), "Should contain file_path in schema"); + assert!(lark_str.contains("old_string"), "Should contain old_string in schema"); + assert!(lark_str.contains("new_string"), "Should contain new_string in schema"); + assert!(lark_str.contains("max_replacements"), "Should contain max_replacements in schema"); + assert!(lark_str.contains("context"), "Should contain context in schema"); + assert!(lark_str.contains("tags"), "Should contain tags in schema"); + + // Validate required parameters in JSON schema + assert!(lark_str.contains("\"required\""), "Should have required array"); + assert!(lark_str.contains("file_path"), "Should have file_path in required"); + assert!(lark_str.contains("old_string"), "Should have old_string in required"); + assert!(lark_str.contains("new_string"), "Should have new_string in required"); + } + + #[test] + fn test_tool_grammar_builder_build_xml_multiple_tools_full_validation() { + // Test ToolGrammarBuilder.build_xml() with multiple tools and full validation + let tools = vec![ + crate::tools::ToolBuilder::new("search".to_string(), "Search the web".to_string()) + .param("query", "string", "Search query", true) + .param("max_results", "integer", "Max results", false) + .build(), + crate::tools::ToolBuilder::new("weather".to_string(), "Get weather".to_string()) + .param("city", "string", "City name", true) + .param("units", "string", "Units", false) + .build(), + ]; + + // Build XML grammar with token IDs for envelope tags + let mut start_ids = HashSet::new(); + start_ids.insert(151657); + let mut end_ids = HashSet::new(); + end_ids.insert(151658); + + let grammar = ToolGrammarBuilder::new() + .tools(&tools) + .start_tag("") + .end_tag("") + .start_is_special(false) + .end_is_special(false) + .start_token_ids(Some(start_ids)) + .end_token_ids(Some(end_ids)) + .build_xml(); + + let lark_str = get_lark_from_top_level_grammar(&grammar); + + // Validate envelope layer + assert!(lark_str.contains("<[151657]>"), "Should have start token ID envelope"); + assert!(lark_str.contains("<[151658]>"), "Should have end token ID envelope"); + + // Validate tool_content alternation with both tools + assert!(lark_str.contains("tool_content: tool_0 | tool_1"), "Should have tool alternation"); + + // Validate tool_0 (search) structure + assert!(lark_str.contains("tool_0:"), "Should have tool_0 rule"); + assert!(lark_str.contains("function=search"), "Should have search function tag"); + assert!(lark_str.contains("parameter=query"), "Should have query parameter"); + assert!(lark_str.contains("parameter=max_results"), "Should have max_results parameter"); + assert!(lark_str.contains("param_0_0:"), "Should have param_0_0 (query - required)"); + assert!(lark_str.contains("param_0_1:"), "Should have param_0_1 (max_results - optional)"); + + // Validate tool_1 (weather) structure + assert!(lark_str.contains("tool_1:"), "Should have tool_1 rule"); + assert!(lark_str.contains("function=weather"), "Should have weather function tag"); + assert!(lark_str.contains("parameter=city"), "Should have city parameter"); + assert!(lark_str.contains("parameter=units"), "Should have units parameter"); + assert!(lark_str.contains("param_1_0:"), "Should have param_1_0 (city - required)"); + assert!(lark_str.contains("param_1_1:"), "Should have param_1_1 (units - optional)"); + } + + #[test] + fn test_tool_grammar_builder_build_json_multiple_tools_full_validation() { + // Test ToolGrammarBuilder.build_json() with multiple tools and full validation + let tools = vec![ + crate::tools::ToolBuilder::new("search".to_string(), "Search the web".to_string()) + .param("query", "string", "Search query", true) + .param("max_results", "integer", "Max results", false) + .build(), + crate::tools::ToolBuilder::new("weather".to_string(), "Get weather".to_string()) + .param("city", "string", "City name", true) + .param("units", "string", "Units", false) + .build(), + ]; + + // Build JSON grammar with token IDs for envelope tags + let mut start_ids = HashSet::new(); + start_ids.insert(151657); + let mut end_ids = HashSet::new(); + end_ids.insert(151658); + + let grammar = ToolGrammarBuilder::new() + .tools(&tools) + .start_tag("") + .end_tag("") + .start_is_special(false) + .end_is_special(false) + .start_token_ids(Some(start_ids)) + .end_token_ids(Some(end_ids)) + .build_json(); + + let lark_str = get_lark_from_top_level_grammar(&grammar); + + // Validate envelope layer + assert!(lark_str.contains("<[151657]>"), "Should have start token ID envelope"); + assert!(lark_str.contains("<[151658]>"), "Should have end token ID envelope"); + + // Validate obj alternation with both tools + assert!(lark_str.contains("obj: obj_search | obj_weather"), "Should have obj alternation"); + + // Validate obj_search structure + assert!(lark_str.contains("obj_search:"), "Should have obj_search rule"); + assert!(lark_str.contains("query"), "Should have query in obj_search"); + assert!(lark_str.contains("max_results"), "Should have max_results in obj_search"); + + // Validate obj_weather structure + assert!(lark_str.contains("obj_weather:"), "Should have obj_weather rule"); + assert!(lark_str.contains("city"), "Should have city in obj_weather"); + assert!(lark_str.contains("units"), "Should have units in obj_weather"); + + // Validate required parameters in both schemas + assert!(lark_str.contains("\"required\":[\"query\"]"), "Should have query in required for search"); + assert!(lark_str.contains("\"required\":[\"city\"]"), "Should have city in required for weather"); + } +} diff --git a/src/transfer/comm.rs b/src/transfer/comm.rs index c2c83b39..dbc534ae 100644 --- a/src/transfer/comm.rs +++ b/src/transfer/comm.rs @@ -1,6 +1,6 @@ // src/core/transfer/comm.rs use super::{FinishedPrefillData, PdConfig, PdRole, TransferMessage}; -use bincode; +use rmp_serde; use candle_core::Result; use interprocess::local_socket::traits::Listener; use interprocess::local_socket::traits::Stream; @@ -382,9 +382,9 @@ impl Communicator { } /// Generic, standardized function to send a message. -/// Uses a 4-byte LE length prefix followed by bincode data. +/// Uses a 4-byte LE length prefix followed by rmp data. fn send_message_generic(stream: &mut (impl Read + Write), msg: &TransferMessage) -> Result { - let serialized: Vec = bincode::serialize(msg).map_err(candle_core::Error::wrap)?; + let serialized: Vec = rmp_serde::to_vec(msg).map_err(candle_core::Error::wrap)?; let len = serialized.len() as u32; stream.write_all(&len.to_le_bytes())?; stream.write_all(&serialized)?; @@ -393,7 +393,7 @@ fn send_message_generic(stream: &mut (impl Read + Write), msg: &TransferMessage) } /// Generic, standardized function to receive a message. -/// Reads a 4-byte LE length prefix then bincode data. +/// Reads a 4-byte LE length prefix then rmp data. fn receive_message_generic(stream: &mut (impl Read + Write)) -> Result { let mut len_buf = [0u8; 4]; stream.read_exact(&mut len_buf)?; @@ -407,6 +407,6 @@ fn receive_message_generic(stream: &mut (impl Read + Write)) -> Result bool { + let Some(template) = &self.chat_template else { + return false; + }; + let lower = template.to_lowercase(); + lower.contains("tools") + || lower.contains("tool_calls") + || lower.contains("[available_tools]") + || lower.contains("") + } + #[allow(dead_code)] fn clear_message(&mut self) { self.messages.clear() diff --git a/src/utils/command.rs b/src/utils/command.rs index b8cbc74f..1951eadf 100644 --- a/src/utils/command.rs +++ b/src/utils/command.rs @@ -1,5 +1,5 @@ use crate::runner::MessageType; -use bincode; +use rmp_serde; use interprocess::local_socket::traits::{Listener, Stream}; use interprocess::local_socket::{GenericNamespaced, Name, ToNsName}; use interprocess::local_socket::{ListenerOptions, Stream as LocalStream}; @@ -41,7 +41,7 @@ impl CommandManager { streams: &mut Vec, message: &MessageType, ) -> std::io::Result<()> { - let serialized = bincode::serialize(message).expect("Serialization failed"); + let serialized = rmp_serde::to_vec(message).expect("Serialization failed"); for stream in streams.iter_mut() { stream.write_all(&(serialized.len() as u32).to_le_bytes())?; stream.write_all(&serialized)?; @@ -74,7 +74,7 @@ impl CommandManager { let mut serialized = vec![0u8; length]; stream.read_exact(&mut serialized)?; let message: MessageType = - bincode::deserialize(&serialized).expect("Deserialization failed"); + rmp_serde::from_slice(&serialized).expect("Deserialization failed"); // Send acknowledgment stream.write_all(&[1])?; stream.flush()?; diff --git a/src/utils/config.rs b/src/utils/config.rs index d488838a..b6f71144 100644 --- a/src/utils/config.rs +++ b/src/utils/config.rs @@ -1,13 +1,32 @@ // src/utils/config.rs use crate::transfer::PdConfig; +use llguidance::api::TopLevelGrammar; #[cfg(feature = "python")] use pyo3::pyclass; use serde::de::value::SeqAccessDeserializer; use serde::de::{Deserializer, Visitor}; use serde::{Deserialize, Serialize, Serializer}; + use std::collections::HashMap; use std::fmt; +#[cfg(not(feature = "python"))] +impl SamplingParams { + /// Convert grammar to constraint for GuidanceState construction + /// Prioritizes constraint field, falls back to grammar field + pub fn to_constraint(&self) -> Option { + self.grammar.clone() + } +} + +#[cfg(feature = "python")] +impl SamplingParams { + /// Convert grammar to constraint for GuidanceState construction + pub fn to_constraint(&self) -> Option { + self.grammar.clone() + } +} + #[derive(Debug, Clone)] pub enum EosTokenId { Single(u32), @@ -263,6 +282,10 @@ pub struct EngineConfig { pub tool_prompt_template: Option, pub pd_server_prefix_cache_ratio: Option, pub pd_client_prefix_cache_ratio: Option, + /// Allow client-submitted constraints via HTTP API + pub allow_constraint_api: bool, + /// Whether to automatically build LLG grammar from tools + pub enable_tool_grammar: bool, } #[cfg(feature = "python")] @@ -340,6 +363,10 @@ pub struct EngineConfig { pub pd_server_prefix_cache_ratio: Option, #[pyo3(get, set)] pub pd_client_prefix_cache_ratio: Option, + #[pyo3(get, set)] + pub allow_constraint_api: bool, + #[pyo3(get, set)] + pub enable_tool_grammar: bool, } #[cfg(not(feature = "python"))] @@ -374,7 +401,9 @@ impl EngineConfig { tool_prompt_template: Option, pd_server_prefix_cache_ratio: Option, pd_client_prefix_cache_ratio: Option, - ) -> Self { + allow_constraint_api: bool, + enable_tool_grammar: bool, + ) -> Self { let mut device_ids = device_ids.unwrap_or_default(); if device_ids.is_empty() { device_ids.push(0); @@ -420,12 +449,14 @@ impl EngineConfig { pd_config, mcp_command, mcp_config, - mcp_args, - tool_prompt_template, - pd_server_prefix_cache_ratio, - pd_client_prefix_cache_ratio, - } - } + mcp_args, + tool_prompt_template, + pd_server_prefix_cache_ratio, + pd_client_prefix_cache_ratio, + allow_constraint_api, + enable_tool_grammar, + } + } } #[derive(Clone, Debug, serde::Deserialize)] @@ -435,7 +466,59 @@ pub struct TokenizerConfig { pub add_eos_token: Option, pub chat_template: Option, pub bos_token: Option, - pub eos_token: Option, + #[serde(deserialize_with = "eos_token_deserialize")] + pub eos_token: Option, +} + +/// Helper to deserialize EOS token which can be a string or a list of strings +fn eos_token_deserialize<'de, D>(deserializer: D) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + use serde::Deserialize; + let opt = Option::::deserialize(deserializer)?; + match opt { + None => Ok(None), + Some(v) => { + if v.is_string() { + Ok(Some(EosTokenEntry::single(v.as_str().unwrap().to_string()))) + } else if v.is_array() { + let arr = v.as_array().unwrap(); + let tokens: Vec = arr + .iter() + .map(|x| x.as_str().unwrap().to_string()) + .collect(); + Ok(Some(EosTokenEntry::multiple(tokens))) + } else { + Err(serde::de::Error::custom("eos_token must be a string or array")) + } + } + } +} + +/// EOS token entry - can be single or multiple strings +#[derive(Debug, Clone)] +#[cfg_attr(feature = "python", pyclass)] +pub struct EosTokenEntry { + pub tokens: Vec, +} + +impl EosTokenEntry { + pub fn single(s: String) -> Self { + Self { tokens: vec![s] } + } + + pub fn multiple(tokens: Vec) -> Self { + Self { tokens } + } + + pub fn as_single(&self) -> Option<&str> { + if self.tokens.len() == 1 { + Some(&self.tokens[0]) + } else { + None + } + } } #[cfg(not(feature = "python"))] @@ -459,6 +542,9 @@ pub struct SamplingParams { /// If Some(true), external tools are enabled and stream finishes at . #[serde(default)] pub mcp_mode: Option, + /// Grammar constraint as TopLevelGrammar for RPC serialization + #[serde(default)] + pub grammar: Option, } #[cfg(feature = "python")] @@ -491,8 +577,13 @@ pub struct SamplingParams { #[pyo3(get, set)] pub mcp_mode: Option, #[pyo3(get, set)] - #[serde(alias = "enable_thinking")] pub thinking: Option, + /// Grammar constraint as TopLevelGrammar for RPC serialization + #[serde(default)] + pub grammar: Option, + /// Grammar constraint as JSON string for Python API + #[pyo3(get, set)] + pub grammar_json: Option, } #[cfg(not(feature = "python"))] @@ -520,6 +611,7 @@ impl SamplingParams { mcp_mode: None, stop_sequences: None, stop_token_ids: None, + grammar: None, thinking, } } @@ -537,11 +629,34 @@ impl SamplingParams { mcp_mode: None, stop_sequences: None, stop_token_ids: None, + grammar: None, + thinking: None, + } + } +} + +#[cfg(not(feature = "python"))] +impl Default for SamplingParams { + fn default() -> Self { + Self { + temperature: None, + max_tokens: Some(16384), + ignore_eos: false, + top_k: None, + top_p: None, + session_id: None, + frequency_penalty: None, + presence_penalty: None, + mcp_mode: None, + stop_sequences: None, + stop_token_ids: None, + grammar: None, thinking: None, } } } +#[cfg(feature = "python")] impl Default for SamplingParams { fn default() -> Self { Self { @@ -557,6 +672,8 @@ impl Default for SamplingParams { stop_sequences: None, stop_token_ids: None, thinking: None, + grammar: None, + grammar_json: None, } } } @@ -588,8 +705,8 @@ pub struct GenerationConfig { /// Randomness of sampling. /// rec. default = 1 pub temperature: Option, - /// Cumulative prob of the top tokens to consider, must be in (0, 1]. Set 1 to consider all toks. - /// rec. default = 1 + /// Cumulative prob of the top tokens to consider, must be in (0, 1]. Set 1 to consider all toks. + /// rec. default = 1 pub top_p: Option, /// Control the number of top tokens to consider, set -1 to consider all. /// rec. default = -1 diff --git a/src/utils/guidance.rs b/src/utils/guidance.rs index e37dbad8..15e3727f 100644 --- a/src/utils/guidance.rs +++ b/src/utils/guidance.rs @@ -1,54 +1,1291 @@ // src/utils/guidance.rs -//! Guided decoding support via llguidance. -//! -//! NOTE: This module is currently stubbed out due to API changes in llguidance >= 0.6. -//! The TopLevelGrammar::from_json_schema method is no longer available. -//! Guided decoding features are temporarily disabled. - -use serde_json::Value; -use std::path::Path; +use anyhow::Result; +use candle_core::Tensor; +use llguidance::{api::TopLevelGrammar, Matcher, ParserFactory as LlgParserFactory}; +use std::collections::HashMap; use std::sync::Arc; +use tokenizers::Tokenizer; +use crate::utils::special_tokens::SpecialTokens; +use toktrie::{SimpleVob, TokTrie}; +use toktrie_hf_tokenizers::{ByteTokenizer, ByteTokenizerEnv}; -// Import toktrie from the crate root (it's re-exported by llguidance) -pub use toktrie::TokTrie; +use crate::tools::Tool; +use crate::utils::logits_processor::{LogitsProcessor, Sampling}; +use serde_json::json; +use crate::tools::schema::ToolGrammarBuilder; + +/// Error type for grammar-related errors +#[derive(Debug, thiserror::Error)] +pub enum GrammarError { + #[error("structured_outputs must set exactly one of choice, regex, json, grammar, or structural_tag")] + TooManyConstraints, + + #[error("response_format.json_schema is required for type=json_schema")] + MissingJsonSchema, + + #[error("unsupported response_format type: {0}")] + UnsupportedFormat(String), + + #[error("invalid grammar: {0}")] + InvalidGrammar(String), + + #[error("tool grammar construction failed: {0}")] + ToolGrammarError(String), +} + +pub type GrammarResult = Result; + +/// Builder for structured output constraint grammars +pub struct ConstraintBuilder { + choice: Option>, + regex: Option, + json: Option, + grammar: Option, + structural_tag: Option, +} + +impl ConstraintBuilder { + pub fn new() -> Self { + Self { + choice: None, + regex: None, + json: None, + grammar: None, + structural_tag: None, + } + } + + pub fn choice(mut self, choice: Vec) -> Self { + self.choice = Some(choice); + self + } + + pub fn regex(mut self, regex: String) -> Self { + self.regex = Some(regex); + self + } + + pub fn json(mut self, json: serde_json::Value) -> Self { + self.json = Some(json); + self + } + + pub fn grammar(mut self, grammar: String) -> Self { + self.grammar = Some(grammar); + self + } + + pub fn structural_tag(mut self, tag: serde_json::Value) -> Self { + self.structural_tag = Some(tag); + self + } + + pub fn build(self) -> Result> { + let mut selected: Option = None; + let mut constraint_count = 0; + + if let Some(choice) = self.choice { + constraint_count += 1; + if constraint_count > 1 { + return Err(anyhow::Error::msg("structured_outputs must set exactly one of choice, regex, json, grammar, or structural_tag")); + } + let choice_gram = crate::tools::schema::build_choice_lark_grammar(&choice) + .map_err(|e| anyhow::Error::msg(e))?; + selected = Some(choice_gram); + } + + if let Some(regex) = self.regex { + constraint_count += 1; + if constraint_count > 1 { + return Err(anyhow::Error::msg("structured_outputs must set exactly one of choice, regex, json, grammar, or structural_tag")); + } + let regex_gram = TopLevelGrammarExt::from_regex_ascii(®ex); + selected = Some(regex_gram); + } + + if let Some(schema) = self.json { + constraint_count += 1; + if constraint_count > 1 { + return Err(anyhow::Error::msg("structured_outputs must set exactly one of choice, regex, json, grammar, or structural_tag")); + } + let schema = crate::tools::schema::sanitize_schema_for_llguidance(&schema); + let json_gram = TopLevelGrammarExt::from_json_schema_utf8(schema) + .map_err(|e| anyhow::Error::msg(e.to_string()))?; + selected = Some(json_gram); + } + + if let Some(grammar) = self.grammar { + constraint_count += 1; + if constraint_count > 1 { + return Err(anyhow::Error::msg("structured_outputs must set exactly one of choice, regex, json, grammar, or structural_tag")); + } + let lark_gram = TopLevelGrammarExt::from_lark_utf8(&grammar); + selected = Some(lark_gram); + } + + if let Some(tag) = self.structural_tag { + constraint_count += 1; + if constraint_count > 1 { + return Err(anyhow::Error::msg("structured_outputs must set exactly one of choice, regex, json, grammar, or structural_tag")); + } + let (start, end, schema) = crate::tools::schema::parse_structural_tag(&tag) + .map_err(|e| anyhow::Error::msg(e))?; + let schema = crate::tools::schema::sanitize_schema_for_llguidance(&schema); + let tools = crate::tools::schema::schema_to_tools(&schema); + let tool_gram = ToolGrammarBuilder::new() + .tools(&tools) + .start_tag(&start) + .end_tag(&end) + .start_is_special(false) + .end_is_special(false) + .build_json(); + selected = Some(tool_gram); + } + + if selected.is_none() { + return Err(anyhow::Error::msg("structured_outputs must set exactly one of choice, regex, json, grammar, or structural_tag")); + } + + Ok(selected) + } +} + +/// Builder for composing multiple grammars with alternation +/// This provides a more readable, declarative way to build composed grammars +pub struct GrammarBuilder { + alternatives: Vec, + max_tokens: Option, +} + +impl GrammarBuilder { + pub fn new() -> Self { + Self { + alternatives: Vec::new(), + max_tokens: None, + } + } + + pub fn alternative(mut self, grammar: TopLevelGrammar) -> Self { + self.alternatives.push(grammar); + self + } + + pub fn max_tokens(mut self, tokens: usize) -> Self { + self.max_tokens = Some(tokens); + self + } + + pub fn build(self) -> TopLevelGrammar { + // Note: GrammarBuilder currently uses chat_text_expression() without EOS tokens + // EOS token support is provided through compose_grammars() directly + match self.alternatives.len() { + 0 => { + let lark = chat_text_expression(); + TopLevelGrammar::from_lark_utf8(&lark) + } + 1 => { + let mut gram = self.alternatives.into_iter().next().unwrap(); + gram.max_tokens = self.max_tokens; + gram + } + _ => { + let merged = merge_top_level_grammars( + self.alternatives, + self.max_tokens, + Some("|".to_string()) + ); + merged + } + } + } +} + +/// Extension trait for TopLevelGrammar with built-in sanitization +/// This ensures all grammar construction paths sanitize inputs consistently +pub trait TopLevelGrammarExt: Sized { + /// Create TopLevelGrammar from regex with ASCII sanitization + fn from_regex_ascii(regex: &str) -> Self; + + /// Create TopLevelGrammar from Lark string with UTF-8 sanitization + fn from_lark_utf8(lark: &str) -> Self; + + /// Create TopLevelGrammar from JSON schema with UTF-8 sanitization + fn from_json_schema_utf8(schema: serde_json::Value) -> Result; +} + +impl TopLevelGrammarExt for TopLevelGrammar { + fn from_regex_ascii(regex: &str) -> Self { + let sanitized = sanitize_to_ascii(regex); + Self::from_regex(&sanitized) + } + + fn from_lark_utf8(lark: &str) -> Self { + let sanitized = sanitize_utf8_valid(lark); + Self::from_lark(sanitized) + } + + fn from_json_schema_utf8(schema: serde_json::Value) -> Result { + let schema_str = serde_json::to_string(&schema)?; + let sanitized = sanitize_utf8_valid(&schema_str); + let val = serde_json::from_str(&sanitized)?; + Ok(Self::from_json_schema(val)) + } +} + +/// Sanitize a string by removing non-ASCII bytes +/// This is used for tool choice strings to ensure only safe ASCII characters reach llguidance lexer +pub fn sanitize_to_ascii(s: &str) -> String { + s.bytes() + .filter(|&b| b.is_ascii()) + .map(|b| b as char) + .collect::() +} + +/// Sanitize a string by removing invalid UTF-8 sequences and control characters +pub fn sanitize_utf8_valid(s: &str) -> String { + let mut result = String::new(); + for ch in s.chars() { + if ch.is_control() && !matches!(ch, '\n' | '\r' | '\t') { + continue; + } + result.push(ch); + } + result +} + +/// Parse a Lark grammar string to extract the start rule RHS and other rules +/// Returns (start_rhs, other_rules) where start_rhs is the RHS of the start: rule +/// The RHS should be a list of rule names separated by | for alternation +fn parse_lark_grammar(lark: &str) -> (String, Vec) { + let lines: Vec<&str> = lark.lines().collect(); + if lines.is_empty() { + return (String::new(), Vec::new()); + } + + let first_line = lines[0].trim(); + if first_line.starts_with("start:") { + // Extract only the rule names after "start:", not the full rule definition + let rhs_part = first_line.strip_prefix("start:").unwrap_or("").trim(); + + // Parse the RHS to get individual rule names (separated by |) + // We only want the rule names, not their definitions + let rule_names: Vec = rhs_part + .split('|') + .map(|s| s.trim().to_string()) + .collect(); + + // The RHS for alternation should be just the rule names + let start_rhs = rule_names.join(" | "); + + // Return all remaining lines as other rules + let other_rules: Vec = lines[1..].iter().map(|s| s.to_string()).collect(); + + (start_rhs, other_rules) + } else { + // No start rule - treat entire grammar as the start rule + (lark.to_string(), Vec::new()) + } +} + +/// Combine grammar rules, handling duplicate rule names by merging them +fn combine_rules(rules: Vec) -> String { + if rules.is_empty() { + return String::new(); + } + + // Group rules by their name (the part before ":") + use std::collections::HashMap; + let mut rule_groups: HashMap> = HashMap::new(); + + for rule in rules { + let rule = rule.trim(); + if rule.is_empty() { + continue; + } + + // Find the rule name (before the first ":") + if let Some(colon_pos) = rule.find(':') { + let name = rule[..colon_pos].trim().to_string(); + let body = rule[colon_pos + 1..].trim().to_string(); + + rule_groups.entry(name).or_default().push(body); + } else { + // Rule without colon - add as-is + rule_groups.entry("anonymous".to_string()).or_default().push(rule.to_string()); + } + } + + // Reconstruct rules, merging duplicates + let mut combined = Vec::new(); + for (name, bodies) in rule_groups { + if bodies.len() == 1 { + combined.push(format!("{}: {}", name, bodies[0])); + } else { + // Multiple definitions for same rule - combine with alternation + combined.push(format!("{}: {}", name, bodies.join(" | "))); + } + } + + combined.join("\n") +} + +/// Merge multiple TopLevelGrammar objects into one +/// This creates a single Lark grammar with alternation at the start rule level +/// Each sub-grammar's rules are combined directly without rule_N indirection +pub fn merge_top_level_grammars(grammars: Vec, max_tokens: Option, start_separator: Option) -> TopLevelGrammar { + // Extract all Lark grammar strings + let mut lark_parts = Vec::new(); + + let sep = match start_separator { + Some(s) => s, + None => "|".to_string(), + }; + + for (_i, g) in grammars.iter().enumerate() { + for gw in &g.grammars { + if let Some(lark) = &gw.lark_grammar { + lark_parts.push(lark.clone()); + } + } + } + + if lark_parts.is_empty() { + let lark_start_exp = format!("start: text\ntext[stop=\"\"]: /((?s).*?)/"); + let mut tlg = TopLevelGrammar::from_lark(lark_start_exp); + tlg.max_tokens = max_tokens; + return tlg; + } + + // Parse each grammar and extract start RHS + other rules + let mut combined_start_rhs = Vec::new(); + let mut all_other_rules = Vec::new(); + + for lark in lark_parts.iter() { + crate::log_debug!("[llg] parse_lark_grammar() input: {}", &lark); + let (start_rhs, other_rules) = parse_lark_grammar(lark); + crate::log_debug!( + "[llg] parse_lark_grammar() -> start_rhs='{}', other_rules_count={}", + start_rhs, other_rules.len() + ); + combined_start_rhs.push(start_rhs); + all_other_rules.extend(other_rules); + } + + // Combine all other rules, handling duplicates + let combined_rules = combine_rules(all_other_rules); + + // Build new grammar with direct alternation at start + let start_separator = format!(" {} ", &sep); + let start_alternation = combined_start_rhs.join(&start_separator); + let final_grammar = format!("start: ( {} )+\n{}", start_alternation, combined_rules); + + let mut top_gram = TopLevelGrammar::from_lark(final_grammar); + top_gram.max_tokens = max_tokens; + top_gram +} + +/// Extract the Lark grammar string from TopLevelGrammar for debugging +pub fn get_lark_from_top_level_grammar(gram: &TopLevelGrammar) -> String { + if gram.grammars.is_empty() { + return "No grammars".to_string(); + } + let larks: Vec = gram.grammars.iter() + .filter_map(|g| g.lark_grammar.as_ref()) + .map(|s| s.clone()) + .collect(); + if larks.is_empty() { + format!("{} grammars, none have lark_grammar", gram.grammars.len()) + } else { + larks.join("\n---\n") + } +} + +/// Lark grammar TEXT pattern for common UTF-8 printable characters +/// Excludes control characters (0x00-0x1F), DEL (0x7F), and C1 controls (0x80-0x9F) +/// This pattern allows: +/// - ASCII printable: space (0x20) through tilde (0x7E) +/// - Unicode text: 0x80 onwards (Latin extended, accented chars, CJK, emoji, etc.) +/// - Common whitespace: newline, carriage return, tab +/// +/// ## Binary Token Matching with llguidance Matcher +/// +/// When working with Qwen-style tool tokens (e.g., `<‌tool_call>`), llguidance uses +/// a **byte-level lexer approach** with the following key concepts: +/// +/// ### 1. Token-Based, Not Byte-Based +/// The `Matcher.compute_mask()` returns a [`SimpleVob`](toktrie::SimpleVob) - a bit vector +/// where each bit represents whether a **token ID** is allowed. This is pre-computed +/// against the tokenizer's trie. +/// +/// ### 2. Special Token Marker (0xFF) +/// llguidance uses byte `0xFF` (TokTrie::SPECIAL_TOKEN_MARKER) to prefix special tokens +/// like `<|end_of_text|>`, `<|eot_id|>`, etc. This is because: +/// - `0xFF` is not valid UTF-8, so it never appears in regular text +/// - In Rust: `&[u8]` can contain 0xFF, but `&str` cannot +/// - Tokenizers like Qwen may embed special tokens as bytes like `[\xFF, b'[', b'1', b'2', b']']` +/// +/// ### 3. Qwen Tool Call Format Example +/// For models like Qwen3 that use `<‌tool_call>` delimiters: +/// +/// ```lark +/// start: tool* +/// tool: "<‌tool_call>" "\n" func "\n" "<‌/tool_call>" ("\n")* +/// func: %json {"type":"object","properties":{"name":...}} +/// ``` +/// +/// ### 4. Current Implementation in vLLM.rs +/// The [`src/core/runner.rs`](src/core/runner.rs) uses logits-based sampling: +/// ```ignore +/// // Apply mask: set disallowed tokens to -inf +/// for tok in 0..vocab_size { +/// if !mask.is_allowed(tok as u32) { +/// row[tok] = f32::NEG_INFINITY; +/// } +/// } +/// ``` +/// This is compatible with llguidance's token-level SimpleVob mask because: +/// - `mask.is_allowed(tok)` checks if token ID `tok` is in the allowed set +/// - The logits are modified to give -inf to disallowed tokens +/// - Sampling then only picks from allowed tokens +/// Sanitize string for Lark grammar - only allow ASCII characters +fn lark_quote(value: &str) -> String { + // Strip non-ASCII characters to prevent grammar parser errors + let sanitized: String = value + .chars() + .filter(|c| c.is_ascii()) + .collect(); + let escaped = sanitized.replace('\\', "\\\\").replace('"', "\\\""); + format!("\"{}\"", escaped) +} + +/// Build special token syntax for Lark grammar using token IDs +/// When token IDs are available, uses <[token_id]> syntax instead of string literals +/// This ensures alignment with the outbound parser's token-based detection +pub fn build_special_token_tag(token_ids: &std::collections::HashSet, fallback: &str) -> String { + if token_ids.is_empty() { + // Fall back to string representation when token IDs are not available + return lark_quote(fallback); + } + // Convert token IDs to Lark special token syntax <[id]> + // The format is: <[token_id]> which matches what the tokenizer expects + let ids: Vec = token_ids.iter().map(|id| format!("[{}]", id)).collect(); + format!("<{}>", ids.join(",")) +} + +/// Build tool call start tag using token IDs when available +pub fn build_tool_call_tag(start_token_ids: &std::collections::HashSet, start_token_str: &str) -> String { + build_special_token_tag(start_token_ids, start_token_str) +} + +/// Build tool call end tag using token IDs when available +pub fn build_tool_call_end_tag(end_token_ids: &std::collections::HashSet, end_token_str: &str) -> String { + build_special_token_tag(end_token_ids, end_token_str) +} + +/// Build TEXT pattern with explicit EOS token IDs using <[id]> syntax +/// The EOS tokens are alternated as optional termination: TEXT eos? +pub fn chat_text_expression_with_eos(special_tokens: &SpecialTokens) -> String { + let eos_token_ids = special_tokens.eos_ids(); + + // First check environment variable override + if let Ok(val) = std::env::var("VLLM_LLG_DEFAULT_TEXT") { + return format!("{}", val); + } + + // Build EOS alternation pattern using <[id]> syntax for token IDs + // LHS must be lowercase - literal tokens aren't allowed in TERMINAL rules + let eos_pattern = if eos_token_ids.is_empty() { + // Fallback to stop="" when no EOS tokens available + r#"start: text +text[stop=""]: /((?s).*?)/"#.to_string() + } else if eos_token_ids.len() == 1 { + format!(r#"start: text_with_eos +text_with_eos: TEXT eos? +TEXT: /(?s:.*)/ +eos: <[{}]>"#, eos_token_ids[0]) + } else { + let ids: Vec = eos_token_ids.iter().map(|id| format!("<[{}]>", id)).collect(); + let eos_alternation = ids.join(" | "); + format!(r#"start: text_with_eos +text_with_eos: TEXT eos? +TEXT: /(?s:.*)/ +eos: {}"#, eos_alternation) + }; + + eos_pattern +} + +/// Build TEXT pattern with stop="" attribute for proper EOS bounding +/// The stop="" attribute sets ends_at_eos: true so the parser can terminate at EOS +/// The [lazy] syntax is for rules, not terminals - options go AFTER the rule name, BEFORE the colon +pub fn chat_text_expression() -> String { + // First check environment variable override + if let Ok(val) = std::env::var("VLLM_LLG_DEFAULT_TEXT") { + return format!("{}", val); + } + + // Use a rule (lowercase) with stop="" attribute for proper EOS termination + // The stop="" tells llguidance to allow EOS token as a valid termination point + // Options go after the rule name, before the colon: text[stop=""]: /pattern/ + r#"start: text +text[stop=""]: /((?s).*?)/"#.to_string() +} + +/// Build grammar vec based on constraint and tool presence +/// Returns a Vec where the first element gets the start: rule +pub fn build_grammar_vec( + constraint_grammars: Vec, + tool_grammar: Option, + tool_choice_required: bool, +) -> Vec { + match (constraint_grammars.is_empty(), tool_grammar.is_some(), tool_choice_required) { + // No constraints, no tools → text only + (true, false, _) => { + let lark_exp = format!("start: text\ntext[stop=\"\"]: /((?s).*?)/"); + vec![TopLevelGrammar::from_lark(lark_exp)] + }, + + // No constraints, tools optional → TEXT | tool_call + (true, true, false) => { + let mut grammars = constraint_grammars; + grammars.push(tool_grammar.unwrap()); + grammars + } + + // No constraints, tools required → tool_call only + (true, true, true) => { + vec![tool_grammar.unwrap()] + } + + // Constraints present, no tools → constraint only + (false, false, _) => constraint_grammars, + + // Constraints present, tools optional → constraint | tool_call + (false, true, false) => { + let mut grammars = constraint_grammars; + grammars.push(tool_grammar.unwrap()); + grammars + } + + // Constraints present, tools required → constraint | tool_call + (false, true, true) => { + let mut grammars = constraint_grammars; + grammars.push(tool_grammar.unwrap()); + grammars + } + } +} + +/// Compose grammars based on constraint and tool settings +/// Returns a single TopLevelGrammar with proper precedence +/// This function takes the grammar that was built externally (with appropriate model-specific format) +/// and handles the alternation/composition logic +pub fn compose_grammars( + mut constraint_grammars: Vec, + tool_grammar: Option, + has_tools: bool, + tool_choice_required: bool, + forced_tool_name: Option, + max_tokens: Option, + special_tokens: &SpecialTokens, +) -> TopLevelGrammar { + crate::log_debug!("[llg] compose_grammars() called: constraints={:?}", constraint_grammars.len()); + crate::log_debug!("[llg] compose_grammars(): has_tools={}, tool_choice_required={}, forced_tool_name={:?}", has_tools, tool_choice_required, forced_tool_name); + + match ( + constraint_grammars.is_empty(), + tool_grammar.is_some(), + tool_choice_required, + forced_tool_name.is_some(), + ) { + // No constraint, no tools → text with EOS bounding + (true, false, _, _) => { + // Build TEXT pattern with explicit EOS token IDs + // This generates: start: text_with_eos, text_with_eos: TEXT eos?, TEXT: /pattern/, eos: <[id]> + let lark = chat_text_expression_with_eos(special_tokens); + crate::log_debug!("[llg] compose_grammars() -> text with EOS: {}", &lark); + TopLevelGrammar::from_lark_utf8(&lark) + } + + // No constraint, tools optional → tool_call | text with EOS + (true, true, false, false) => { + // Build text grammar with explicit EOS token IDs + let lark = chat_text_expression_with_eos(special_tokens); + let text_gram = TopLevelGrammar::from_lark(lark); + let tool_gram = tool_grammar.unwrap(); + let start_sep = "|".to_string(); + let merged = merge_top_level_grammars(vec![text_gram, tool_gram], max_tokens, Some(start_sep)); + crate::log_debug!("[llg] compose_grammars() -> ( text with EOS | tool_call )+"); + merged + } + + // No constraint, tools required → tool_call only + (true, true, true, _) => { + let tool_gram = tool_grammar.unwrap(); + crate::log_debug!("[llg] compose_grammars() -> tool_call only (tools required)"); + tool_gram + } + + // No constraint, tools optional, specific tool forced → tool_call only + (true, true, false, true) => { + let tool_gram = tool_grammar.unwrap(); + crate::log_debug!("[llg] compose_grammars() -> tool_call only (forced tool: {})", forced_tool_name.unwrap()); + tool_gram + } + + // Constraint only, no tools → constraint only + (false, false, _, _) => { + let constraint_gram = constraint_grammars.remove(0); + crate::log_debug!("[llg] compose_grammars() -> constraint only"); + constraint_gram + } + + // Constraint only, tools optional → tool_call | constraint + (false, true, false, false) => { + // Build combined grammar with constraint and tool_call + let constraint_gram = constraint_grammars.remove(0); + let tool_gram = tool_grammar.unwrap(); + // Build the merged grammar with constraint | tool_call + // Use merge_top_level_grammars with None separator (|) + merge_top_level_grammars(vec![constraint_gram, tool_gram], max_tokens, None) + } + + // Constraint only, tools required → tool_call | constraint + (false, true, true, _) => { + let constraint_gram = constraint_grammars.remove(0); + let tool_gram = tool_grammar.unwrap(); + merge_top_level_grammars(vec![constraint_gram, tool_gram], max_tokens, None) + } + + // Constraint only, specific tool forced → tool_call | constraint + (false, true, false, true) => { + let constraint_gram = constraint_grammars.remove(0); + let tool_gram = tool_grammar.unwrap(); + merge_top_level_grammars(vec![constraint_gram, tool_gram], max_tokens, None) + } + } +} + +pub type ParserFactory = LlgParserFactory; + +pub fn build_llg_factory( + tokenizer: Tokenizer, + vocab_size: Option, +) -> Result> { + let tokenizer_vocab = tokenizer.get_vocab_size(true); + let target_vocab = vocab_size.map(|v| { + if v < tokenizer_vocab { + crate::log_warn!( + "Requested vocab size {} is smaller than tokenizer vocab size {}. Using tokenizer size.", + v, + tokenizer_vocab + ); + tokenizer_vocab + } else { + v + } + }); + let env = ByteTokenizer::from_tokenizer(tokenizer)?.into_tok_env(target_vocab)?; + let factory = ParserFactory::new_simple(&env)?; + Ok(Arc::new(factory)) +} + +pub fn load_toktrie_from_path(path: impl AsRef) -> Result { + let tokenizer = ByteTokenizer::from_file(path)?; + let env = ByteTokenizerEnv::new(tokenizer, None)?; + Ok(env.tok_trie) +} + +/// WS regex pattern for Lark grammars - matches whitespace including spaces, tabs, newlines, carriage returns +pub fn lark_ws_regex() -> &'static str { + "/[ \\\\t\\\\r\\\\n]+/" +} + +/// Build Lark grammar string for tool calls +pub fn build_tool_call_lark(tools: &[Tool], schema_map: &std::sync::Arc>, start: &str, end: &str) -> String { + let mut obj_rules = String::new(); + for tool in tools { + let name = &tool.function.name; + let schema_str = serde_json::to_string(schema_map.get(name).unwrap_or(&json!({}))).unwrap_or_default(); + obj_rules.push_str(&format!("obj_{}: %json {}\n", name.replace("-", "_"), schema_str)); + } + + format!("{start} _WS? json_array _WS? {end}\njson_array: \"[\" obj (\",\" obj)* \"]\"\nobj:\n_WS: {}\n{}", lark_ws_regex(), obj_rules.trim_end()) +} + +/// Cache for precomputed mask slices to avoid expensive re-computation +#[derive(Clone, Default)] +pub struct SlicerCache { + cache: HashMap>, +} + +impl SlicerCache { + /// Get or compute a mask slice for a given position + pub fn get_or_compute(&mut self, pos: usize, compute_fn: impl FnOnce() -> Vec) -> &Vec { + if !self.cache.contains_key(&pos) { + self.cache.insert(pos, compute_fn()); + } + self.cache.get(&pos).expect("entry must exist after compute") + } + + /// Clear the cache + pub fn clear(&mut self) { + self.cache.clear(); + } +} pub struct GuidanceState { - // Placeholder for future implementation - _phantom: std::marker::PhantomData<()>, + matcher: Matcher, + /// Track llm tokens for speculative decoding recovery + llm_tokens: Vec, + /// Track llm bytes for rollback calculations + llm_bytes: usize, + /// Cache for precomputed mask slices + slicer_cache: SlicerCache, } impl GuidanceState { - pub fn new(_toktrie: Arc, _schema: Value) -> anyhow::Result { - // Stubbed out - guided decoding temporarily disabled - anyhow::bail!("Guided decoding is temporarily disabled due to llguidance API changes. \ - The TopLevelGrammar::from_json_schema method is no longer available in llguidance >= 0.6") + pub fn new_from_grammar(factory: Arc, grammar: &TopLevelGrammar) -> Result { + crate::log_debug!("[llg] GuidanceState::new_from_grammar() called"); + crate::log_trace!("[llg] Creating parser from grammar"); + let parser = factory.create_parser(grammar.clone())?; + crate::log_trace!("[llg] Creating Matcher from parser"); + let matcher = Matcher::new(Ok(parser)); + crate::log_info!("[llg] GuidanceState created successfully for grammar"); + + Ok(Self { + matcher, + llm_tokens: Vec::new(), + llm_bytes: 0, + slicer_cache: SlicerCache::default(), + }) + } + + /// Compute mask with caching for performance + pub fn compute_mask(&mut self) -> Result> { + crate::log_trace!("[llg] compute_mask() called"); + + if self.matcher.is_stopped() { + crate::log_trace!("[llg] compute_mask() - matcher stopped, returning None"); + return Ok(None); + } + let mask = self.matcher.compute_mask()?; + crate::log_trace!("[llg] compute_mask() - mask computed with {} valid tokens", mask.len()); + Ok(Some(mask)) + } + + /// Commit token and track for speculative decoding recovery + pub fn commit_token(&mut self, token: u32) -> Result<()> { + crate::log_trace!("[llg] commit_token(token={})", token); + + if !self.matcher.is_stopped() { + self.matcher.consume_token(token)?; + crate::log_trace!("[llg] Token {} consumed successfully", token); + self.llm_tokens.push(token); + self.llm_bytes += 4; + } else { + crate::log_trace!("[llg] commit_token() - matcher stopped, skipping"); + } + Ok(()) + } + + /// Get the number of committed tokens + pub fn num_tokens(&self) -> usize { + self.llm_tokens.len() + } + + /// Get the number of committed bytes + pub fn num_bytes(&self) -> usize { + self.llm_bytes + } + + /// Check if guidance is finished + pub fn is_finished(&self) -> bool { + self.matcher.is_stopped() + } + + /// Get the last committed token + pub fn last_token(&self) -> Option { + self.llm_tokens.last().copied() + } + + /// Validate token without consuming it (for re-sampling) + pub fn validate_token(&mut self, token: u32) -> bool { + if self.matcher.is_stopped() { + return true; + } + let result = self.matcher.validate_tokens(&[token]).unwrap_or(0); + let is_valid = result == 1; + if !is_valid { + crate::log_debug!("[llg] Token {} rejected by grammar", token); + } + is_valid + } + + /// Compute mask or return EOS token set if stopped + pub fn compute_mask_or_eos(&mut self) -> Result { + self.matcher.compute_mask_or_eos().map_err(Into::into) + } + + /// Fast-forward tokens without consuming them (for speculative decoding) + pub fn compute_ff_tokens(&mut self) -> Vec { + if self.matcher.is_stopped() { + return Vec::new(); + } + self.matcher.compute_ff_tokens() + } + + /// Fast-forward and consume tokens guaranteed to be accepted by the grammar + pub fn consume_ff_tokens(&mut self) -> Result, anyhow::Error> { + crate::log_debug!("[llg] consume_ff_tokens() called"); + + if self.matcher.is_stopped() { + crate::log_trace!("[llg] consume_ff_tokens() - matcher stopped, returning empty"); + return Ok(Vec::new()); + } + + let ff_tokens = self.matcher.compute_ff_tokens(); + crate::log_debug!("[llg] compute_ff_tokens() returned {} tokens", ff_tokens.len()); + + for &token in &ff_tokens { + crate::log_trace!("[llg] Consuming FF token {}", token); + self.matcher.consume_token(token)?; + self.llm_tokens.push(token); + self.llm_bytes += 4; + } + + crate::log_debug!("[llg] consume_ff_tokens() - successfully consumed {} tokens", ff_tokens.len()); + Ok(ff_tokens) + } + + /// Check if there are pending lexeme bytes to be consumed + pub fn has_pending_lexeme_bytes(&self) -> bool { + false + } + + /// Rollback to a previous state with byte tracking + pub fn rollback_to(&mut self, token_pos: usize, byte_pos: usize) -> Result<()> { + let tokens_to_rollback = self.llm_tokens.len().saturating_sub(token_pos); + if tokens_to_rollback > 0 { + self.matcher.rollback(tokens_to_rollback)?; + } + self.llm_tokens.truncate(token_pos); + self.llm_bytes = byte_pos; + Ok(()) + } + + /// Capture current state as rollback snapshot + pub fn capture_snapshot(&mut self) { } - pub fn compute_allowed_tokens(&mut self) -> anyhow::Result { - anyhow::bail!("Guided decoding is temporarily disabled") + /// Clear all state + pub fn clear(&mut self) { + self.llm_tokens.clear(); + self.llm_bytes = 0; + self.slicer_cache.clear(); } - pub fn commit_token(&mut self, _token: u32) -> anyhow::Result<()> { - anyhow::bail!("Guided decoding is temporarily disabled") + /// Get a reference to the slicer cache + pub fn slicer_cache(&mut self) -> &mut SlicerCache { + &mut self.slicer_cache + } + + /// Validate a sequence of tokens against the grammar + pub fn validate_tokens(&mut self, tokens: &[u32]) -> Option { + if self.matcher.is_stopped() { + return Some(tokens.len()); + } + match self.matcher.validate_tokens(tokens) { + Ok(count) => Some(count), + Err(_) => None, + } } } -pub struct AllowedTokens { - pub tokens: Vec, - pub is_stopped: bool, +/// Apply sparse mask bias to logits +/// Uses iter_set_entries to only iterate allowed tokens +pub fn _batch_mask_bias( + logits: &Tensor, + masks: &[(usize, SimpleVob)], + vocab_size: usize, +) -> candle_core::Result { + let batch_size = masks.len(); + + // Create bias vector initialized to -inf + let mut bias_data = vec![f32::NEG_INFINITY; batch_size * vocab_size]; + + // Fill in allowed tokens using sparse iteration + // masks is Vec<(batch_idx, SimpleVob)> where batch_idx is the sequence position in the batch + for (batch_idx, mask) in masks.iter() { + mask.iter_set_entries(|idx| { + if idx < vocab_size { + bias_data[*batch_idx * vocab_size + idx] = 0.0; + } + }); + } + + // Create bias tensor on same device as logits + let bias_tensor = Tensor::from_vec(bias_data, (batch_size, vocab_size), logits.device())?; + + // GPU tensor addition (no CPU copy) + logits.broadcast_add(&bias_tensor) } -pub fn build_toktrie_from_tokenizer_bytes(bytes: &[u8]) -> anyhow::Result { - // Try to build TokTrie from bytes - // The new API uses TokTrie::from() with TokRxInfo and words - // For now, return an error as the exact migration path needs investigation - anyhow::bail!("TokTrie construction from tokenizer bytes is temporarily disabled. \ - The TokTrie::from_huggingface_bytes method is no longer available in toktrie >= 1.0. \ - Input bytes length: {}", bytes.len()) +/// Two-stage validation with early exit +/// Stage 1: Sample and validate token +/// Stage 2: Only compute mask if token is invalid +pub fn _early_exit_validate( + guidance_states: &mut HashMap, + seq_ids: &[usize], + tokens: &mut [u32], + logits: &Tensor, + vocab_size: usize, + _factory: &Arc, + sampling: &Sampling, + logit_processor: &LogitsProcessor, +) -> candle_core::Result<()> { + for (seq_idx, seq_id) in seq_ids.iter().enumerate() { + let token = tokens[seq_idx]; + + if let Some(state) = guidance_states.get_mut(seq_id) { + // Stage 1: Validate token + if state.validate_token(token) { + // Early exit - token is valid, consume it + state.commit_token(token).map_err(|e| candle_core::Error::Msg(e.to_string()))?; + continue; + } + + crate::log_debug!("[llg] Token {} is invalid, computing mask for seq {}", token, seq_id); + + // Stage 2: Token is invalid, compute mask and re-sample + let mask = match state.compute_mask_or_eos() { + Ok(m) => m, + Err(e) => { + crate::log_error!("[llg] Unable to compute mask for token {} due to {}", token, e); + continue; + } + }; + + crate::log_debug!("[llg] Applying bias to logits for seq {}", seq_id); + + // Build bias vector using sparse iteration + let mut acc = vec![f32::NEG_INFINITY; vocab_size]; + mask.iter_set_entries(|idx| { + if idx < acc.len() { + acc[idx] = 0.0; + } + }); + + // Get current sequence's logits as 1D tensor - MUST CLONE to avoid cross-contamination + let row_start = seq_idx * vocab_size; + let row_end = row_start + vocab_size; + let logits_vec = logits.flatten_all()?.to_vec1::()?; + let mut row_vec = logits_vec.clone(); // Clone to avoid modifying original + let row = &mut row_vec[row_start..row_end]; + + // Apply bias directly to this sequence's row + for tok in 0..vocab_size { + if acc[tok] != 0.0 { + row[tok] = f32::NEG_INFINITY; + } + } + + // Create 1D tensor for just this sequence + let biased_row = Tensor::from_vec(row_vec[row_start..row_end].to_vec(), (vocab_size,), logits.device())?; + + // Re-sample just this sequence from the biased 1D logits + let re_sampled = logit_processor.sample_with_strategy(&biased_row, sampling)?; + tokens[seq_idx] = re_sampled[0]; // 1D output, first (only) element + + crate::log_debug!("[llg] Consuming re-sampled token {} for seq {}", tokens[seq_idx], seq_id); + + // Commit the re-sampled token + state.commit_token(tokens[seq_idx]).map_err(|e| candle_core::Error::Msg(e.to_string()))?; + } else { + crate::log_debug!("[llg] No guidance state for seq {}", seq_id); + } + } + + Ok(()) } -pub fn load_toktrie_from_path(_: &Path) -> Option { - // Temporarily disabled - returns None - // crate::log_warn!("load_toktrie_from_path is disabled: {:?}", path); - None +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_sanitize_to_ascii() { + let input = "hello"; + let sanitized = sanitize_to_ascii(input); + assert_eq!(sanitized, "hello"); + } + + #[test] + fn test_sanitize_utf8_valid() { + let input = "hello\x00\x01world"; + let sanitized = sanitize_utf8_valid(input); + assert_eq!(sanitized, "helloworld"); + } + + #[test] + fn test_grammar_builder_single_alternative() { + let grammar = GrammarBuilder::new() + .alternative(TopLevelGrammar::from_lark("start: 'a'".to_string())) + .build(); + assert!(grammar.grammars.len() > 0); + } + + #[test] + fn test_grammar_builder_multiple_alternatives() { + let grammar = GrammarBuilder::new() + .alternative(TopLevelGrammar::from_lark("start: 'a'".to_string())) + .alternative(TopLevelGrammar::from_lark("start: 'b'".to_string())) + .build(); + let lark_str = get_lark_from_top_level_grammar(&grammar); + assert!(lark_str.contains("start: ( 'a' | 'b' )+"), "Expected direct alternation"); + } + + #[test] + fn test_grammar_builder_with_max_tokens() { + let grammar = GrammarBuilder::new() + .alternative(TopLevelGrammar::from_lark("start: 'test'".to_string())) + .max_tokens(100) + .build(); + assert_eq!(grammar.max_tokens, Some(100)); + } + + #[test] + fn test_grammar_builder_default_text() { + let grammar = GrammarBuilder::new().build(); + let lark_str = get_lark_from_top_level_grammar(&grammar); + assert!(lark_str.contains("start: text"), "Expected default text pattern"); + } + + #[test] + fn test_merge_top_level_grammars_direct_alternation() { + // Test that merge_top_level_grammars produces direct alternation without rule_N indirection + let gram1 = TopLevelGrammar::from_lark("start: 'a'".to_string()); + let gram2 = TopLevelGrammar::from_lark("start: 'b'".to_string()); + // Use None for default separator (|) + let result = merge_top_level_grammars(vec![gram1, gram2], None, None); + + // Get the combined Lark string + let lark_str = get_lark_from_top_level_grammar(&result); + + // Verify that start: directly alternates 'a' | 'b' without rule_N indirection + assert!(lark_str.contains("start: ( 'a' | 'b' )+"), "Expected direct alternation in start rule: {}", lark_str); + // Verify that rule_N indirection is NOT present + assert!(!lark_str.contains("rule_0:"), "Should not contain rule_0 indirection"); + assert!(!lark_str.contains("rule_1:"), "Should not contain rule_1 indirection"); + } + + #[test] + fn test_merge_top_level_grammars_with_text_and_tool() { + // Test the actual TEXT | tool_call scenario from the issue + let lark = format!("start: TEXT\n{}", chat_text_expression()); + let text_gram = TopLevelGrammar::from_lark(lark); + let tool_gram = TopLevelGrammar::from_lark("start: tool_call\ntool_call: \"test\"".to_string()); + // Use None for default separator (|) + let result = merge_top_level_grammars(vec![text_gram, tool_gram], None, None); + + // Get the combined Lark string + let lark_str = get_lark_from_top_level_grammar(&result); + + // Verify that start: directly alternates TEXT | tool_call + assert!(lark_str.contains("start: ( TEXT | tool_call )+"), "Expected direct alternation: {}", lark_str); + // Verify that rule_N indirection is NOT present + assert!(!lark_str.contains("rule_0:"), "Should not contain rule_0 indirection"); + assert!(!lark_str.contains("rule_1:"), "Should not contain rule_1 indirection"); + } + + #[test] + fn test_merge_top_level_grammars_with_grammar_without_start() { + // Verify that when merging a grammar without start: line, it gets properly handled + let gram1 = TopLevelGrammar::from_lark("start: 'a'\n'a': 'a'".to_string()); + let gram2 = TopLevelGrammar::from_lark("'tool': 'call'\ntool: %json {\"type\":\"object\"}".to_string()); + // Use None for default separator (|) + let result = merge_top_level_grammars(vec![gram1, gram2], None, None); + + // Get the combined Lark string + let lark_str = get_lark_from_top_level_grammar(&result); + + // Should still have direct alternation at start + assert!(lark_str.contains("start:"), "Expected start rule in merged grammar"); + // The tool grammar should be properly included + assert!(lark_str.contains("'tool': 'call'"), "Expected tool content in merged grammar"); + } +} + +#[cfg(test)] +mod tool_grammar_builder_tests { + use super::*; + use crate::tools::ToolBuilder; + use std::collections::HashSet; + + #[test] + fn test_tool_grammar_builder_json_single_tool() { + let tools = vec![ToolBuilder::new("search".to_string(), "Search the web".to_string()) + .param("query".to_string(), "string".to_string(), "Search query".to_string(), true) + .build()]; + let grammar = ToolGrammarBuilder::new() + .tools(&tools) + .start_tag("".to_string()) + .end_tag("".to_string()) + .start_is_special(false) + .end_is_special(false) + .build_json(); + assert!(grammar.grammars.len() > 0); + } + + #[test] + fn test_tool_grammar_builder_json_multiple_tools() { + let tools = vec![ + ToolBuilder::new("search".to_string(), "Search the web".to_string()) + .param("query".to_string(), "string".to_string(), "Search query".to_string(), true) + .build(), + ToolBuilder::new("weather".to_string(), "Get weather".to_string()) + .param("city".to_string(), "string".to_string(), "City name".to_string(), true) + .build(), + ]; + let grammar = ToolGrammarBuilder::new() + .tools(&tools) + .start_tag("".to_string()) + .end_tag("".to_string()) + .start_is_special(false) + .end_is_special(false) + .build_json(); + let lark_str = get_lark_from_top_level_grammar(&grammar); + assert!(lark_str.contains("obj_search:"), "Should contain obj_search rule"); + assert!(lark_str.contains("obj_weather:"), "Should contain obj_weather rule"); + } + + #[test] + fn test_tool_grammar_builder_xml_single_tool() { + let tools = vec![ToolBuilder::new("search".to_string(), "Search the web".to_string()) + .param("query".to_string(), "string".to_string(), "Search query".to_string(), true) + .build()]; + let grammar = ToolGrammarBuilder::new() + .tools(&tools) + .start_tag("".to_string()) + .end_tag("".to_string()) + .start_is_special(false) + .end_is_special(false) + .build_xml(); + assert!(grammar.grammars.len() > 0); + } + + #[test] + fn test_tool_grammar_builder_xml_multiple_tools() { + let tools = vec![ + ToolBuilder::new("search".to_string(), "Search the web".to_string()) + .param("query".to_string(), "string".to_string(), "Search query".to_string(), true) + .build(), + ToolBuilder::new("weather".to_string(), "Get weather".to_string()) + .param("city".to_string(), "string".to_string(), "City name".to_string(), true) + .build(), + ]; + let grammar = ToolGrammarBuilder::new() + .tools(&tools) + .start_tag("".to_string()) + .end_tag("".to_string()) + .start_is_special(false) + .end_is_special(false) + .build_xml(); + let lark_str = get_lark_from_top_level_grammar(&grammar); + assert!(lark_str.contains("tool_content: tool_0 | tool_1"), "Expected tool alternation"); + } + + #[test] + fn test_tool_grammar_builder_with_token_ids() { + let tools = vec![ToolBuilder::new("search".to_string(), "Search the web".to_string()) + .param("query".to_string(), "string".to_string(), "Search query".to_string(), true) + .build()]; + let mut start_ids = HashSet::new(); + start_ids.insert(151657); + let mut end_ids = HashSet::new(); + end_ids.insert(151658); + + let grammar = ToolGrammarBuilder::new() + .tools(&tools) + .start_tag("".to_string()) + .end_tag("".to_string()) + .start_is_special(false) + .end_is_special(false) + .start_token_ids(Some(start_ids)) + .end_token_ids(Some(end_ids)) + .build_json(); + + let lark_str = get_lark_from_top_level_grammar(&grammar); + assert!(lark_str.contains("<[151657]>"), "Should contain start token ID"); + assert!(lark_str.contains("<[151658]>"), "Should contain end token ID"); + } + + #[test] + fn test_tool_grammar_builder_special_tags() { + let tools = vec![ToolBuilder::new("search".to_string(), "Search the web".to_string()) + .param("query".to_string(), "string".to_string(), "Search query".to_string(), true) + .build()]; + let grammar = ToolGrammarBuilder::new() + .tools(&tools) + .start_tag("".to_string()) + .end_tag("".to_string()) + .start_is_special(true) + .end_is_special(true) + .build_json(); + let lark_str = get_lark_from_top_level_grammar(&grammar); + assert!(lark_str.contains(""), "Should contain special start tag"); + assert!(lark_str.contains(""), "Should contain special end tag"); + } + + #[test] + fn test_tool_grammar_builder_empty_tools_json() { + let grammar = ToolGrammarBuilder::new() + .tools(&[]) + .start_tag("".to_string()) + .end_tag("".to_string()) + .start_is_special(false) + .end_is_special(false) + .build_json(); + let lark_str = get_lark_from_top_level_grammar(&grammar); + assert!(lark_str.contains("start: tool_call"), "Should have start: tool_call"); + assert!(lark_str.contains("obj: %json"), "Should have obj rule with generic schema"); + } + + #[test] + fn test_tool_grammar_builder_empty_tools_xml() { + let grammar = ToolGrammarBuilder::new() + .tools(&[]) + .start_tag("".to_string()) + .end_tag("".to_string()) + .start_is_special(false) + .end_is_special(false) + .build_xml(); + let lark_str = get_lark_from_top_level_grammar(&grammar); + assert!(lark_str.contains("start: tool_call"), "Should have start: tool_call"); + assert!(lark_str.contains("tool_content:"), "Should have tool_content rule"); + } + + #[test] + fn test_tool_grammar_builder_complex_schema() { + let tools = vec![ToolBuilder::new("edit_file".to_string(), "Edit a file".to_string()) + .param("file_path".to_string(), "string".to_string(), "Path to the file".to_string(), true) + .param("old_string".to_string(), "string".to_string(), "String to replace".to_string(), true) + .param("new_string".to_string(), "string".to_string(), "Replacement string".to_string(), true) + .param("max_replacements".to_string(), "integer".to_string(), "Maximum replacements".to_string(), false) + .build()]; + + let grammar = ToolGrammarBuilder::new() + .tools(&tools) + .start_tag("".to_string()) + .end_tag("".to_string()) + .start_is_special(false) + .end_is_special(false) + .build_xml(); + + let lark_str = get_lark_from_top_level_grammar(&grammar); + assert!(lark_str.contains("param_0_0:"), "Should have param_0_0 rule (file_path - required)"); + assert!(lark_str.contains("param_0_1:"), "Should have param_0_1 rule (old_string - required)"); + assert!(lark_str.contains("param_0_2:"), "Should have param_0_2 rule (new_string - required)"); + assert!(lark_str.contains("param_0_3:"), "Should have param_0_3 rule (max_replacements - optional)"); + } } diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 124b63ad..e8e1fe66 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -14,6 +14,7 @@ pub mod image; pub mod kvcache_allocator; pub mod logits_processor; pub mod progress; +pub mod special_tokens; use crate::core::GenerationOutput; use crate::models::gemma3::config::Gemma3Config; use crate::utils::config::MoEConfig; @@ -24,7 +25,7 @@ use crate::utils::downloader::ModelPaths; use crate::utils::gguf_helper::{get_gguf_info, GGUFInfo}; use candle_core::utils::{cuda_is_available, metal_is_available}; use candle_core::{DType, Device, Result}; -use config::{Config, EngineConfig, EosTokenId, GenerationConfig, TokenizerConfig}; +use config::{Config, EngineConfig, EosTokenEntry, EosTokenId, GenerationConfig, TokenizerConfig}; use std::collections::HashMap; use std::path::{Path, PathBuf}; use tokenizers::Tokenizer; @@ -852,7 +853,7 @@ pub fn init_config_tokenizer( add_eos_token: Some(eos.is_some()), chat_template: chat_template.clone(), bos_token: bos, - eos_token: eos, + eos_token: eos.map(|e| EosTokenEntry::multiple(vec![e])), }; let archs = config.architectures.as_ref().unwrap(); diff --git a/src/utils/special_tokens.rs b/src/utils/special_tokens.rs new file mode 100644 index 00000000..5b7e7b50 --- /dev/null +++ b/src/utils/special_tokens.rs @@ -0,0 +1,362 @@ +use tokenizers::tokenizer::{Tokenizer, AddedToken}; + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Copy)] +pub enum Category { + Eos, + Pad, + Bos, + Sep, + Cls, + Mask, + Tool, + Function, + Parameter, + Role, + ContentType, + Reasoning, + Other, +} + +#[derive(Debug, Clone)] +pub enum MatchRule { + Exact(String), + StartsWith(String), + Contains(String), + And(Box, Box), + Or(Box, Box), + Not(Box), +} + +impl MatchRule { + pub fn matches(&self, content: &str) -> bool { + match self { + MatchRule::Exact(s) => s == content, + MatchRule::StartsWith(s) => content.starts_with(s), + MatchRule::Contains(s) => content.contains(s), + MatchRule::And(lhs, rhs) => lhs.matches(content) && rhs.matches(content), + MatchRule::Or(lhs, rhs) => lhs.matches(content) || rhs.matches(content), + MatchRule::Not(inner) => !inner.matches(content), + } + } + + pub fn and(self, other: Self) -> Self { + MatchRule::And(Box::new(self), Box::new(other)) + } + + pub fn or(self, other: Self) -> Self { + MatchRule::Or(Box::new(self), Box::new(other)) + } + + pub fn not(self) -> Self { + MatchRule::Not(Box::new(self)) + } +} + +#[derive(Debug, Clone)] +pub struct SpecialTokenMatch { + pub category: Category, + pub id: u32, + pub content: String, +} + + +#[derive(Debug, Clone, Default)] +pub struct SpecialTokens { + eos: Vec<(u32, String)>, + pad: Vec<(u32, String)>, + bos: Vec<(u32, String)>, + sep: Vec<(u32, String)>, + cls: Vec<(u32, String)>, + mask: Vec<(u32, String)>, + tool: Vec<(u32, String)>, + function: Vec<(u32, String)>, + parameter: Vec<(u32, String)>, + role: Vec<(u32, String)>, + content_type: Vec<(u32, String)>, + reasoning: Vec<(u32, String)>, + other: Vec<(u32, String)>, +} + +impl SpecialTokens { + /// Search for tokens by ID or by substring within the token content. + /// + /// # Arguments + /// * `id` - Optional token ID to match exactly. + /// * `substring` - Optional string to search for within the token content (case-sensitive). + /// + /// # Returns + /// A vector of `Match` structs containing the category, id, and string of all matches. + pub fn search(&self, id: Option, substring: Option<&str>) -> Vec { + let mut results = Vec::new(); + + // Helper closure to check a single vector of tokens + let mut check_tokens = |vec: &[(u32, String)], cat: Category| { + for (token_id, content) in vec { + let id_match = match id { + Some(target_id) => *token_id == target_id, + None => true, + }; + + let content_match = match substring { + Some(sub) => content.contains(sub), + None => true, + }; + + if id_match && content_match { + results.push(SpecialTokenMatch { + category: cat, + id: *token_id, + content: content.clone(), + }); + } + } + }; + + // Iterate through all category vectors + check_tokens(&self.eos, Category::Eos); + check_tokens(&self.pad, Category::Pad); + check_tokens(&self.bos, Category::Bos); + check_tokens(&self.sep, Category::Sep); + check_tokens(&self.cls, Category::Cls); + check_tokens(&self.mask, Category::Mask); + check_tokens(&self.tool, Category::Tool); + check_tokens(&self.function, Category::Function); + check_tokens(&self.parameter, Category::Parameter); + check_tokens(&self.role, Category::Role); + check_tokens(&self.content_type, Category::ContentType); + check_tokens(&self.reasoning, Category::Reasoning); + check_tokens(&self.other, Category::Other); + + results + } + + pub fn new(tokenizer: &Tokenizer) -> Self { + let rules = default_rules(); + Self::from_tokenizer_and_rules(tokenizer, &rules) + } + + pub fn new_from_file(tokenizer_path: &str) -> Self { + let tokenizer = Tokenizer::from_file(tokenizer_path).expect("Failed to load tokenizer"); + Self::new(&tokenizer) + } + + pub fn from_tokenizer_and_rules(tokenizer: &Tokenizer, rules: &[(MatchRule, Category)]) -> Self { + let decoder = tokenizer.get_added_tokens_decoder(); + let mut map: std::collections::HashMap> = + std::collections::HashMap::from([ + (Category::Eos, Vec::new()), + (Category::Pad, Vec::new()), + (Category::Bos, Vec::new()), + (Category::Sep, Vec::new()), + (Category::Cls, Vec::new()), + (Category::Mask, Vec::new()), + (Category::Tool, Vec::new()), + (Category::Function, Vec::new()), + (Category::Parameter, Vec::new()), + (Category::Role, Vec::new()), + (Category::ContentType, Vec::new()), + (Category::Reasoning, Vec::new()), + (Category::Other, Vec::new()), + ]); + for (id, AddedToken { content, .. }) in decoder { + let token = (id, content.clone()); + + // 1. Find the first matching rule + let category = rules.iter() + .find(|(rule, _)| rule.matches(&content)) + .map(|(_, cat)| *cat) + .unwrap_or(Category::Other); + + // 2. Get the vector for this category + let vec = map.get_mut(&category).unwrap(); + + // 3. Idiomatic Uniqueness Check: Push only if ID is not already present + // We assume uniqueness is based on the Token ID (u32) + if !vec.iter().any(|(existing_id, _)| *existing_id == id) { + vec.push(token); + } + } + Self { + eos: map.remove(&Category::Eos).unwrap_or_default(), + pad: map.remove(&Category::Pad).unwrap_or_default(), + bos: map.remove(&Category::Bos).unwrap_or_default(), + sep: map.remove(&Category::Sep).unwrap_or_default(), + cls: map.remove(&Category::Cls).unwrap_or_default(), + mask: map.remove(&Category::Mask).unwrap_or_default(), + tool: map.remove(&Category::Tool).unwrap_or_default(), + function: map.remove(&Category::Function).unwrap_or_default(), + parameter: map.remove(&Category::Parameter).unwrap_or_default(), + role: map.remove(&Category::Role).unwrap_or_default(), + content_type: map.remove(&Category::ContentType).unwrap_or_default(), + reasoning: map.remove(&Category::Reasoning).unwrap_or_default(), + other: map.remove(&Category::Other).unwrap_or_default(), + } + } + + pub fn eos_tokens(&self) -> &[(u32, String)] { &self.eos } + pub fn eos_ids(&self) -> Vec { self.eos.iter().map(|(id, _)| *id).collect() } + pub fn eos_strings(&self) -> Vec { self.eos.iter().map(|(_, s)| s.clone()).collect() } + + pub fn pad_tokens(&self) -> &[(u32, String)] { &self.pad } + pub fn pad_ids(&self) -> Vec { self.pad.iter().map(|(id, _)| *id).collect() } + pub fn pad_strings(&self) -> Vec { self.pad.iter().map(|(_, s)| s.clone()).collect() } + + pub fn bos_tokens(&self) -> &[(u32, String)] { &self.bos } + pub fn bos_ids(&self) -> Vec { self.bos.iter().map(|(id, _)| *id).collect() } + pub fn bos_strings(&self) -> Vec { self.bos.iter().map(|(_, s)| s.clone()).collect() } + + pub fn sep_tokens(&self) -> &[(u32, String)] { &self.sep } + pub fn sep_ids(&self) -> Vec { self.sep.iter().map(|(id, _)| *id).collect() } + pub fn sep_strings(&self) -> Vec { self.sep.iter().map(|(_, s)| s.clone()).collect() } + + pub fn cls_tokens(&self) -> &[(u32, String)] { &self.cls } + pub fn cls_ids(&self) -> Vec { self.cls.iter().map(|(id, _)| *id).collect() } + pub fn cls_strings(&self) -> Vec { self.cls.iter().map(|(_, s)| s.clone()).collect() } + + pub fn mask_tokens(&self) -> &[(u32, String)] { &self.mask } + pub fn mask_ids(&self) -> Vec { self.mask.iter().map(|(id, _)| *id).collect() } + pub fn mask_strings(&self) -> Vec { self.mask.iter().map(|(_, s)| s.clone()).collect() } + + pub fn tool_tokens(&self) -> &[(u32, String)] { &self.tool } + pub fn tool_ids(&self) -> Vec { self.tool.iter().map(|(id, _)| *id).collect() } + pub fn tool_strings(&self) -> Vec { self.tool.iter().map(|(_, s)| s.clone()).collect() } + + pub fn function_tokens(&self) -> &[(u32, String)] { &self.function } + pub fn function_ids(&self) -> Vec { self.function.iter().map(|(id, _)| *id).collect() } + pub fn function_strings(&self) -> Vec { self.function.iter().map(|(_, s)| s.clone()).collect() } + + pub fn parameter_tokens(&self) -> &[(u32, String)] { &self.parameter } + pub fn parameter_ids(&self) -> Vec { self.parameter.iter().map(|(id, _)| *id).collect() } + pub fn parameter_strings(&self) -> Vec { self.parameter.iter().map(|(_, s)| s.clone()).collect() } + + pub fn role_tokens(&self) -> &[(u32, String)] { &self.role } + pub fn role_ids(&self) -> Vec { self.role.iter().map(|(id, _)| *id).collect() } + pub fn role_strings(&self) -> Vec { self.role.iter().map(|(_, s)| s.clone()).collect() } + + pub fn content_type_tokens(&self) -> &[(u32, String)] { &self.content_type } + pub fn content_type_ids(&self) -> Vec { self.content_type.iter().map(|(id, _)| *id).collect() } + pub fn content_type_strings(&self) -> Vec { self.content_type.iter().map(|(_, s)| s.clone()).collect() } + + pub fn reasoning_tokens(&self) -> &[(u32, String)] { &self.reasoning } + pub fn reasoning_ids(&self) -> Vec { self.reasoning.iter().map(|(id, _)| *id).collect() } + pub fn reasoning_strings(&self) -> Vec { self.reasoning.iter().map(|(_, s)| s.clone()).collect() } + + pub fn other_tokens(&self) -> &[(u32, String)] { &self.other } + pub fn other_ids(&self) -> Vec { self.other.iter().map(|(id, _)| *id).collect() } + pub fn other_strings(&self) -> Vec { self.other.iter().map(|(_, s)| s.clone()).collect() } +} + +pub fn default_rules() -> Vec<(MatchRule, Category)> { + vec![ + // eos + (MatchRule::Exact("".to_string()), Category::Eos), + (MatchRule::Exact("<|end_of_text|>".to_string()), Category::Eos), + (MatchRule::Exact("<‌|im_end|>".to_string()), Category::Eos), + (MatchRule::Exact("".to_string()), Category::Eos), + (MatchRule::Exact("eos".to_string()), Category::Eos), + (MatchRule::StartsWith("<|end".to_string()), Category::Eos), + (MatchRule::StartsWith("<|eod".to_string()), Category::Eos), + (MatchRule::Contains("end_of".to_string()), Category::Eos), + (MatchRule::Contains("end".to_string()) + .and(MatchRule::Not(Box::new(MatchRule::Contains("tokenizer".to_string())))), + Category::Eos), + + // pad + (MatchRule::Exact("".to_string()), Category::Pad), + (MatchRule::Exact("".to_string()), Category::Pad), + (MatchRule::Exact("pad".to_string()), Category::Pad), + (MatchRule::Exact("<|video_pad|>".to_string()), Category::Pad), + (MatchRule::Exact("<|vision_pad|>".to_string()), Category::Pad), + (MatchRule::Exact("<|fim_pad|>".to_string()), Category::Pad), + (MatchRule::Exact("<|fim_prefix|>".to_string()), Category::Pad), + (MatchRule::Exact("<|fim_pad|>".to_string()), Category::Pad), + (MatchRule::Exact("<|fim_suffix|>".to_string()), Category::Pad), + (MatchRule::Exact("<|fim_middle|>".to_string()), Category::Pad), + (MatchRule::Exact("<|image_pad|>".to_string()), Category::Pad), + (MatchRule::StartsWith("".to_string()), Category::Bos), + (MatchRule::Exact("<|start_of_turn|>".to_string()), Category::Bos), + (MatchRule::Exact("<|vision_start|>".to_string()), Category::Bos), + (MatchRule::Exact("<|im_start|>".to_string()), Category::Bos), + (MatchRule::Exact("<|quad_start|>".to_string()), Category::Bos), + (MatchRule::Exact("<|box_start|>".to_string()), Category::Bos), + (MatchRule::Exact("<|vision_start|>".to_string()), Category::Bos), + (MatchRule::Exact("<|start_of_turn|>".to_string()), Category::Bos), + (MatchRule::Exact("<|object_ref_start|>".to_string()), Category::Bos), + (MatchRule::StartsWith("<|start".to_string()), Category::Bos), + (MatchRule::Exact("<|im_start|>".to_string()), Category::Eos), + (MatchRule::StartsWith("".to_string()), Category::Sep), + (MatchRule::Exact("<|separator|>".to_string()), Category::Sep), + (MatchRule::Exact("sep".to_string()), Category::Sep), + (MatchRule::StartsWith("".to_string()), Category::Cls), + (MatchRule::Exact("[CLS]".to_string()), Category::Cls), + (MatchRule::Exact("cls".to_string()), Category::Cls), + (MatchRule::StartsWith("".to_string()), Category::Mask), + (MatchRule::Exact("".to_string()), Category::Mask), + (MatchRule::Exact("[MASK]".to_string()), Category::Mask), + (MatchRule::Exact("mask".to_string()), Category::Mask), + (MatchRule::StartsWith("".to_string()), Category::Tool), + (MatchRule::Exact("<|tool|>".to_string()), Category::Tool), + (MatchRule::StartsWith("".to_string()), Category::Function), + (MatchRule::Exact("<|function|>".to_string()), Category::Function), + (MatchRule::StartsWith("".to_string()), Category::Parameter), + (MatchRule::Exact("<|parameter|>".to_string()), Category::Parameter), + (MatchRule::StartsWith("".to_string()), Category::Role), + (MatchRule::Exact("<|role|>".to_string()), Category::Role), + (MatchRule::Exact("<|vision_start|>".to_string()), Category::Role), + (MatchRule::Exact("<|im_start|>".to_string()), Category::Role), + (MatchRule::Exact("<|quad_start|>".to_string()), Category::Role), + (MatchRule::Exact("<|box_start|>".to_string()), Category::Role), + (MatchRule::Exact("<|vision_start|>".to_string()), Category::Role), + (MatchRule::Exact("<|file_sep|>".to_string()), Category::Role), + (MatchRule::Exact("<|im_end|>".to_string()), Category::Role), + (MatchRule::StartsWith("<|role|>".to_string()), Category::Role), + (MatchRule::StartsWith("".to_string()), Category::ContentType), + (MatchRule::Exact("<|content_type|>".to_string()), Category::ContentType), + (MatchRule::StartsWith("".to_string()), Category::Reasoning), + (MatchRule::Exact("".to_string()), Category::Reasoning), + (MatchRule::Exact("".to_string()), Category::Reasoning), + (MatchRule::Exact("".to_string()), Category::Reasoning), + (MatchRule::Exact("".to_string()), Category::Reasoning), + (MatchRule::Exact("<|thinking|>".to_string()), Category::Reasoning), + (MatchRule::Exact("<|reasoning|>".to_string()), Category::Reasoning), + (MatchRule::StartsWith(" Date: Mon, 9 Mar 2026 00:16:16 +0800 Subject: [PATCH 2/2] Support Qwen3.5 Dense models on Metal (#258) --- Cargo.toml | 8 ++-- src/core/engine.rs | 77 +++++++++++++++++++++------------------ src/models/gemma3/mod.rs | 19 +++++++--- src/models/glm4.rs | 15 ++++---- src/models/glm4_moe.rs | 15 ++++---- src/models/llama.rs | 15 ++++---- src/models/phi4.rs | 17 ++++++--- src/models/qwen3.rs | 15 ++++---- src/models/qwen3_5.rs | 15 ++++---- src/models/qwen3_5_moe.rs | 15 ++++---- src/models/qwen3_moe.rs | 15 ++++---- src/runner/runner.rs | 45 +++++++++++++---------- 12 files changed, 150 insertions(+), 121 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index aa2c7ccb..161eaf1f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,8 +11,8 @@ categories = ["algorithms", "hardware-support", "science"] license = "MIT" [dependencies] -candle-core = { git = "https://github.com/guoqingbao/candle.git", version = "0.8.3", rev = "1e9d1a9" } -candle-nn = { git = "https://github.com/guoqingbao/candle.git", version = "0.8.3", rev = "1e9d1a9" } +candle-core = { git = "https://github.com/guoqingbao/candle.git", version = "0.8.3", rev = "157b048" } +candle-nn = { git = "https://github.com/guoqingbao/candle.git", version = "0.8.3", rev = "157b048" } serde = { version = "1.0.190", features = ["serde_derive"] } tokenizers = {version = "0.21.2", features = ["http"] } hf-hub = "0.4.1" @@ -46,7 +46,7 @@ ahash = "0.8.11" reedline = "0.40.0" pyo3 = { version = "0.25.1", features = ["extension-module", "abi3-py38"], optional = true } parking_lot = "0.12.4" -attention-rs = { git = "https://github.com/guoqingbao/attention.rs.git", version="0.4.1", rev = "af0b475" } +attention-rs = { git = "https://github.com/guoqingbao/attention.rs.git", version="0.4.1", rev = "29e4beb" } once_cell = "1.21.3" tqdm = "0.8.0" futures = "0.3.31" @@ -62,7 +62,7 @@ utoipa = { version = "4.2", features = ["axum_extras"] } colored = { version = "3.0.0" } tower-http = { version = "0.6.6", features = ["cors"] } rustchatui = { git = "https://github.com/guoqingbao/rustchatui.git", rev = "68caad9" } -sysinfo = "0.37.2" +sysinfo = "0.38.3" image = { version = "0.25.6", default-features = false, features = ['bmp', 'gif', 'jpeg', 'png', 'tiff', 'webp'] } reqwest = { version = "0.12.24", features = ["blocking", "json", "rustls-tls"]} bytemuck = "1.24.0" diff --git a/src/core/engine.rs b/src/core/engine.rs index af629fa6..09152d09 100644 --- a/src/core/engine.rs +++ b/src/core/engine.rs @@ -206,43 +206,48 @@ impl LLMEngine { let reporter: Arc>> = Arc::new(RwLock::new(Box::new(ProgressReporter::new(0)))); let handle = progress_worker(1, config.num_hidden_layers, &reporter); - let vb = VarBuilderX::new(&model_pathes, is_gguf, dtype, &device)?; - let transfer = if let Some(p_cfg) = &econfig.pd_config { - Some(Arc::new(Transfer::new( - p_cfg.clone(), - 0, - model_loaded.clone(), - stop_flag.clone(), - )?)) - } else { - None - }; - - let mut model_runner = ModelRunner::new( - model_type.clone(), - &vb, - #[cfg(not(feature = "nccl"))] - Rc::new(Comm::default()), - #[cfg(feature = "nccl")] - Rc::new( - Comm::from_rank( - device.as_cuda_device().unwrap().cuda_device(), + let mut model_runner = { + let _guard = candle_core::InferenceMode::enter(); + let vb = VarBuilderX::new(&model_pathes, is_gguf, dtype, &device)?; + let transfer = if let Some(p_cfg) = &econfig.pd_config { + Some(Arc::new(Transfer::new( + p_cfg.clone(), 0, - 1, - Id::new().unwrap(), - ) - .unwrap(), - ), - &mut econfig, - &config, - dtype, - is_rope_i, - device.clone(), - reporter, - transfer, - llg_factory.clone(), - None, - )?; + model_loaded.clone(), + stop_flag.clone(), + )?)) + } else { + None + }; + + let runner = ModelRunner::new( + model_type.clone(), + &vb, + #[cfg(not(feature = "nccl"))] + Rc::new(Comm::default()), + #[cfg(feature = "nccl")] + Rc::new( + Comm::from_rank( + device.as_cuda_device().unwrap().cuda_device(), + 0, + 1, + Id::new().unwrap(), + ) + .unwrap(), + ), + &mut econfig, + &config, + dtype, + is_rope_i, + device.clone(), + reporter, + transfer, + llg_factory.clone(), + None, + )?; + drop(vb); + runner + }; if !is_pd_server { //No graph capture for PD server diff --git a/src/models/gemma3/mod.rs b/src/models/gemma3/mod.rs index 68e435c2..600aafd6 100644 --- a/src/models/gemma3/mod.rs +++ b/src/models/gemma3/mod.rs @@ -538,11 +538,7 @@ impl Gemma3ForConditionalGeneration { } else { vb.pp("language_model.model.embed_tokens") }, - if is_qvar_builder || g_cfg.quant.is_some() { - DType::F32 - } else { - dtype - }, + dtype, )?; let embed_scale = (config.text_config.hidden_size as f64).sqrt(); @@ -659,6 +655,17 @@ impl Gemma3ForConditionalGeneration { }) } + fn embed_forward(&self, input_ids: &Tensor) -> Result { + let xs = self.embed_tokens.forward(input_ids)?; + let xs = if (self.is_qvar_builder || self.g_cfg.quant.is_some()) && xs.dtype() != DType::F32 + { + xs.to_dtype(DType::F32)? + } else { + xs + }; + xs * self.embed_scale + } + fn vision_tower( &self, image_features: &Tensor, @@ -687,7 +694,7 @@ impl Gemma3ForConditionalGeneration { ) -> Result { let text_cfg = &self.config.text_config; // 1. Prepare Text Embeddings (Scaled) - let mut xs = (self.embed_tokens.forward(input_ids)? * self.embed_scale)?; + let mut xs = self.embed_forward(input_ids)?; // vision projection and embedding if let Some(images) = images { diff --git a/src/models/glm4.rs b/src/models/glm4.rs index bdce6101..33ea6608 100644 --- a/src/models/glm4.rs +++ b/src/models/glm4.rs @@ -206,11 +206,7 @@ impl GLM4ForCausalLM { } else { vb.pp("model.embed_tokens") }, - if is_qvar_builder || config.quant.is_some() { - DType::F32 - } else { - dtype - }, + dtype, )?; let rotary_emb = Arc::new(ScalingRotaryEmbedding::new( if is_qvar_builder || config.quant.is_some() { @@ -293,7 +289,12 @@ impl GLM4ForCausalLM { } pub fn embed_forward(&self, xs: &Tensor) -> Result { - self.embed_tokens.forward(xs) + let xs = self.embed_tokens.forward(xs)?; + if (self.is_qvar_builder || self.config.quant.is_some()) && xs.dtype() != DType::F32 { + xs.to_dtype(DType::F32) + } else { + Ok(xs) + } } fn forward_inner( @@ -319,7 +320,7 @@ impl GLM4ForCausalLM { let mut xs = if embeded_inputs { input_ids.to_owned() } else { - self.embed_tokens.forward(input_ids)? + self.embed_forward(input_ids)? }; if let Some(kv_caches) = kv_caches { diff --git a/src/models/glm4_moe.rs b/src/models/glm4_moe.rs index 18c7c75b..b2d1855c 100644 --- a/src/models/glm4_moe.rs +++ b/src/models/glm4_moe.rs @@ -305,11 +305,7 @@ impl GLM4MoEForCausalLM { } else { vb.pp(&format!("{}embed_tokens", prefix)) }, - if is_qvar_builder || config.quant.is_some() { - DType::F32 - } else { - dtype - }, + dtype, )?; let rotary_emb = Arc::new(ScalingRotaryEmbedding::new( if is_qvar_builder || config.quant.is_some() { @@ -393,7 +389,12 @@ impl GLM4MoEForCausalLM { } pub fn embed_forward(&self, xs: &Tensor) -> Result { - self.embed_tokens.forward(xs) + let xs = self.embed_tokens.forward(xs)?; + if (self.is_qvar_builder || self.config.quant.is_some()) && xs.dtype() != DType::F32 { + xs.to_dtype(DType::F32) + } else { + Ok(xs) + } } fn forward_inner( @@ -420,7 +421,7 @@ impl GLM4MoEForCausalLM { let mut xs = if embeded_inputs { input_ids.to_owned() } else { - self.embed_tokens.forward(input_ids)? + self.embed_forward(input_ids)? }; if let Some(kv_caches) = kv_caches { diff --git a/src/models/llama.rs b/src/models/llama.rs index 2bf3e408..450512d4 100644 --- a/src/models/llama.rs +++ b/src/models/llama.rs @@ -173,11 +173,7 @@ impl LLaMaForCausalLM { } else { vb.pp("model.embed_tokens").clone() }, - if is_qvar_builder || config.quant.is_some() { - DType::F32 - } else { - dtype - }, + dtype, )?; let rotary_emb = Arc::new(ScalingRotaryEmbedding::new( @@ -262,7 +258,12 @@ impl LLaMaForCausalLM { } pub fn embed_forward(&self, xs: &Tensor) -> Result { - self.embed_tokens.forward(xs) + let xs = self.embed_tokens.forward(xs)?; + if (self.is_qvar_builder || self.config.quant.is_some()) && xs.dtype() != DType::F32 { + xs.to_dtype(DType::F32) + } else { + Ok(xs) + } } fn forward_inner( @@ -287,7 +288,7 @@ impl LLaMaForCausalLM { let mut xs = if embeded_inputs { input_ids.to_owned() } else { - self.embed_tokens.forward(input_ids)? + self.embed_forward(input_ids)? }; if let Some(kv_caches) = kv_caches { diff --git a/src/models/phi4.rs b/src/models/phi4.rs index 05fe2759..7444eb08 100644 --- a/src/models/phi4.rs +++ b/src/models/phi4.rs @@ -513,11 +513,7 @@ impl Phi4ForCausalLM { } else { vb.pp("model.embed_tokens") }, - if is_qvar_builder || config.quant.is_some() { - DType::F32 - } else { - dtype - }, + dtype, )?; let rotary_emb = Arc::new(Phi4RotaryEmbedding::new( if is_qvar_builder || config.quant.is_some() { @@ -595,6 +591,15 @@ impl Phi4ForCausalLM { }) } + pub fn embed_forward(&self, xs: &Tensor) -> Result { + let xs = self.embed_tokens.forward(xs)?; + if (self.is_qvar_builder || self.config.quant.is_some()) && xs.dtype() != DType::F32 { + xs.to_dtype(DType::F32) + } else { + Ok(xs) + } + } + fn forward_inner( &self, input_ids: &Tensor, @@ -620,7 +625,7 @@ impl Phi4ForCausalLM { let mut xs = if embeded_inputs { input_ids.to_owned() } else { - self.embed_tokens.forward(input_ids)? + self.embed_forward(input_ids)? }; if let Some(kv_caches) = kv_caches { diff --git a/src/models/qwen3.rs b/src/models/qwen3.rs index 8d7f1bb0..6d653a42 100644 --- a/src/models/qwen3.rs +++ b/src/models/qwen3.rs @@ -214,11 +214,7 @@ impl Qwen3ForCausalLM { } else { vb.pp(&format!("{}embed_tokens", prefix)) }, - if is_qvar_builder || config.quant.is_some() { - DType::F32 - } else { - dtype - }, + dtype, )?; let rotary_emb = Arc::new(ScalingRotaryEmbedding::new( if is_qvar_builder || config.quant.is_some() { @@ -301,7 +297,12 @@ impl Qwen3ForCausalLM { } pub fn embed_forward(&self, xs: &Tensor) -> Result { - self.embed_tokens.forward(xs) + let xs = self.embed_tokens.forward(xs)?; + if (self.is_qvar_builder || self.config.quant.is_some()) && xs.dtype() != DType::F32 { + xs.to_dtype(DType::F32) + } else { + Ok(xs) + } } fn forward_inner( @@ -328,7 +329,7 @@ impl Qwen3ForCausalLM { let mut xs = if embeded_inputs { input_ids.to_owned() } else { - self.embed_tokens.forward(input_ids)? + self.embed_forward(input_ids)? }; if let Some(kv_caches) = kv_caches { for ((k_cache, v_cache), (i, layer)) in diff --git a/src/models/qwen3_5.rs b/src/models/qwen3_5.rs index d7a339d5..b22dffd1 100644 --- a/src/models/qwen3_5.rs +++ b/src/models/qwen3_5.rs @@ -328,11 +328,7 @@ impl Qwen3_5ForCausalLM { } else { vb.pp(&format!("{}embed_tokens", prefix)) }, - if is_qvar_builder || config.quant.is_some() { - DType::F32 - } else { - dtype - }, + dtype, )?; let rotary_emb = Arc::new(ScalingRotaryEmbedding::new( @@ -475,7 +471,12 @@ impl Qwen3_5ForCausalLM { } pub fn embed_forward(&self, xs: &Tensor) -> Result { - self.embed_tokens.forward(xs) + let xs = self.embed_tokens.forward(xs)?; + if (self.is_qvar_builder || self.config.quant.is_some()) && xs.dtype() != DType::F32 { + xs.to_dtype(DType::F32) + } else { + Ok(xs) + } } fn forward_inner( @@ -503,7 +504,7 @@ impl Qwen3_5ForCausalLM { let mut xs = if embeded_inputs { input_ids.to_owned() } else { - self.embed_tokens.forward(input_ids)? + self.embed_forward(input_ids)? }; let mut kv_cache_idx = 0usize; diff --git a/src/models/qwen3_5_moe.rs b/src/models/qwen3_5_moe.rs index a014fb65..529a9caf 100644 --- a/src/models/qwen3_5_moe.rs +++ b/src/models/qwen3_5_moe.rs @@ -441,11 +441,7 @@ impl Qwen3_5MoEForCausalLM { } else { vb.pp(&format!("{}embed_tokens", prefix)) }, - if is_qvar_builder || config.quant.is_some() { - DType::F32 - } else { - dtype - }, + dtype, )?; let rotary_emb = Arc::new(ScalingRotaryEmbedding::new( @@ -586,7 +582,12 @@ impl Qwen3_5MoEForCausalLM { } pub fn embed_forward(&self, xs: &Tensor) -> Result { - self.embed_tokens.forward(xs) + let xs = self.embed_tokens.forward(xs)?; + if (self.is_qvar_builder || self.config.quant.is_some()) && xs.dtype() != DType::F32 { + xs.to_dtype(DType::F32) + } else { + Ok(xs) + } } fn forward_inner( @@ -613,7 +614,7 @@ impl Qwen3_5MoEForCausalLM { let mut xs = if embeded_inputs { input_ids.to_owned() } else { - self.embed_tokens.forward(input_ids)? + self.embed_forward(input_ids)? }; let mut kv_cache_idx = 0usize; diff --git a/src/models/qwen3_moe.rs b/src/models/qwen3_moe.rs index 6ad153bc..52e30187 100644 --- a/src/models/qwen3_moe.rs +++ b/src/models/qwen3_moe.rs @@ -348,11 +348,7 @@ impl Qwen3MoEForCausalLM { } else { vb.pp(&format!("{}embed_tokens", prefix)) }, - if is_qvar_builder || config.quant.is_some() { - DType::F32 - } else { - dtype - }, + dtype, )?; let rotary_emb = Arc::new(ScalingRotaryEmbedding::new( if is_qvar_builder || config.quant.is_some() { @@ -436,7 +432,12 @@ impl Qwen3MoEForCausalLM { } pub fn embed_forward(&self, xs: &Tensor) -> Result { - self.embed_tokens.forward(xs) + let xs = self.embed_tokens.forward(xs)?; + if (self.is_qvar_builder || self.config.quant.is_some()) && xs.dtype() != DType::F32 { + xs.to_dtype(DType::F32) + } else { + Ok(xs) + } } fn forward_inner( @@ -463,7 +464,7 @@ impl Qwen3MoEForCausalLM { let mut xs = if embeded_inputs { input_ids.to_owned() } else { - self.embed_tokens.forward(input_ids)? + self.embed_forward(input_ids)? }; if let Some(kv_caches) = kv_caches { diff --git a/src/runner/runner.rs b/src/runner/runner.rs index 470507d1..f6c3b1c3 100644 --- a/src/runner/runner.rs +++ b/src/runner/runner.rs @@ -127,12 +127,6 @@ fn main() -> anyhow::Result<()> { (None, false) }; - let vb = VarBuilderX::new( - &init_req.model_pathes, - init_req.is_gguf, - init_req.dtype.into(), - &device, - )?; let stream_kv = Some(stream.try_clone()?); let mut econfig = init_req.econfig.clone(); let tokenizer = Tokenizer::from_file(init_req.model_pathes.get_tokenizer_filename()) @@ -145,20 +139,31 @@ fn main() -> anyhow::Result<()> { } }; #[allow(unused_mut)] - let mut runner = ModelRunner::new( - init_req.model_type, - &vb, - comm, - &mut econfig, - &init_req.config, - init_req.dtype.into(), - init_req.is_rope_i, - device, - progress_reporter, - transfer, - llg_factory, - stream_kv, - )?; + let mut runner = { + let _guard = candle_core::InferenceMode::enter(); + let vb = VarBuilderX::new( + &init_req.model_pathes, + init_req.is_gguf, + init_req.dtype.into(), + &device, + )?; + let runner = ModelRunner::new( + init_req.model_type, + &vb, + comm, + &mut econfig, + &init_req.config, + init_req.dtype.into(), + init_req.is_rope_i, + device, + progress_reporter, + transfer, + llg_factory, + stream_kv, + )?; + drop(vb); + runner + }; vllm_rs::log_info!( "Runner at rank {} created (PD config: {:?})!",