From e8ec7566763c977580d5ee201a8a1079326037b8 Mon Sep 17 00:00:00 2001 From: Luke Hinds Date: Sun, 16 Feb 2025 08:22:44 +0000 Subject: [PATCH 1/2] Lag Effect --- README.md | 80 ++++++++++++----------------------------- example.responses.yml | 6 +++- pyproject.toml | 2 +- src/mockllm/__init__.py | 2 +- src/mockllm/config.py | 41 ++++++++++++++++++++- src/mockllm/server.py | 15 ++++---- 6 files changed, 79 insertions(+), 67 deletions(-) diff --git a/README.md b/README.md index ed63898..722c37c 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,7 @@ [![PyPI version](https://badge.fury.io/py/mockllm.svg)](https://badge.fury.io/py/mockllm) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) -A FastAPI-based mock LLM server that mimics OpenAI and Anthropic API formats. Instead of calling actual language models, -it uses predefined responses from a YAML configuration file. +A FastAPI-based mock LLM server that mimics OpenAI and Anthropic API formats. Instead of calling actual language models, it uses predefined responses from a YAML configuration file. This is made for when you want a deterministic response for testing or development purposes. @@ -17,8 +16,6 @@ Check out the [CodeGate](https://github.com/stacklok/codegate) project when you' - Streaming support (character-by-character response streaming) - Configurable responses via YAML file - Hot-reloading of response configurations -- JSON logging -- Error handling - Mock token counting ## Installation @@ -128,10 +125,11 @@ curl -X POST http://localhost:8000/v1/messages \ ### Response Configuration -Responses are configured in `responses.yml`. The file has two main sections: +Responses are configured in `responses.yml`. The file has three main sections: 1. `responses`: Maps input prompts to predefined responses 2. `defaults`: Contains default configurations like the unknown response message +3. `settings`: Contains server behavior settings like network lag simulation Example `responses.yml`: ```yaml @@ -141,71 +139,39 @@ responses: defaults: unknown_response: "I don't know the answer to that. This is a mock response." -``` - -### Hot Reloading - -The server automatically detects changes to `responses.yml` and reloads the configuration without requiring a restart. - -## Development - -The project uses Poetry for dependency management and includes a Makefile to help with common development tasks: - -```bash -# Set up development environment -make setup - -# Run all checks (setup, lint, test) -make all -# Run tests -make test - -# Format code -make format - -# Run all linting and type checking -make lint - -# Clean up build artifacts -make clean - -# See all available commands -make help +settings: + lag_enabled: true + lag_factor: 10 # Higher values = faster responses (10 = fast, 1 = slow) ``` -### Development Commands +### Network Lag Simulation -- `make setup`: Install all development dependencies with Poetry -- `make test`: Run the test suite -- `make format`: Format code with black and isort -- `make lint`: Run all code quality checks (format, lint, type) -- `make build`: Build the package with Poetry -- `make clean`: Remove build artifacts and cache files -- `make install-dev`: Install package with development dependencies +The server can simulate network latency for more realistic testing scenarios. This is controlled by two settings: -For more details on available commands, run `make help`. +- `lag_enabled`: When true, enables artificial network lag +- `lag_factor`: Controls the speed of responses + - Higher values (e.g., 10) result in faster responses + - Lower values (e.g., 1) result in slower responses + - Affects both streaming and non-streaming responses -## Error Handling +For streaming responses, the lag is applied per-character with slight random variations to simulate realistic network conditions. -The server includes comprehensive error handling: - -- Invalid requests return 400 status codes with descriptive messages -- Server errors return 500 status codes with error details -- All errors are logged using JSON format +### Hot Reloading -## Logging +The server automatically detects changes to `responses.yml` and reloads the configuration without restarting the server. -The server uses JSON-formatted logging for: +## Testing -- Incoming request details -- Response configuration loading -- Error messages and stack traces +To run the tests: +```bash +poetry run pytest +``` ## Contributing -Contributions are welcome! Please feel free to submit a Pull Request. +Contributions are welcome! Please open an issue or submit a PR. ## License -This project is licensed under the Apache License, Version 2.0 - see the [LICENSE](LICENSE) file for details. +This project is licensed under the [Apache 2.0 License](LICENSE). diff --git a/example.responses.yml b/example.responses.yml index 841229b..4018c4b 100644 --- a/example.responses.yml +++ b/example.responses.yml @@ -5,4 +5,8 @@ responses: "what is the meaning of life?": "According to this mock response, the meaning of life is to write better mock servers." defaults: - unknown_response: "I don't know the answer to that. This is a mock response." \ No newline at end of file + unknown_response: "I don't know the answer to that. This is a mock response." + +settings: + lag_enabled: true + lag_factor: 10 # Higher values = faster responses (10 = fast, 1 = slow) \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 58db09b..be36b84 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "mockllm" -version = "0.0.6" +version = "0.0.7" description = "A mock server that mimics OpenAI and Anthropic API formats for testing" authors = ["Luke Hinds "] license = "Apache-2.0" diff --git a/src/mockllm/__init__.py b/src/mockllm/__init__.py index 92e0200..40af696 100644 --- a/src/mockllm/__init__.py +++ b/src/mockllm/__init__.py @@ -2,4 +2,4 @@ Mock LLM Server - You will do what I tell you! """ -__version__ = "0.0.6" +__version__ = "0.0.7" diff --git a/src/mockllm/config.py b/src/mockllm/config.py index 1d504ec..69749fc 100644 --- a/src/mockllm/config.py +++ b/src/mockllm/config.py @@ -1,6 +1,8 @@ import logging +import asyncio +import random from pathlib import Path -from typing import Dict, Generator, Optional +from typing import Dict, Generator, Optional, AsyncGenerator import yaml from fastapi import HTTPException @@ -20,6 +22,8 @@ def __init__(self, yaml_path: str = "responses.yml"): self.last_modified = 0 self.responses: Dict[str, str] = {} self.default_response = "I don't know the answer to that." + self.lag_enabled = False + self.lag_factor = 10 self.load_responses() def load_responses(self) -> None: @@ -33,6 +37,9 @@ def load_responses(self) -> None: self.default_response = data.get("defaults", {}).get( "unknown_response", self.default_response ) + settings = data.get("settings", {}) + self.lag_enabled = settings.get("lag_enabled", False) + self.lag_factor = settings.get("lag_factor", 10) self.last_modified = int(current_mtime) logger.info( f"Loaded {len(self.responses)} responses from {self.yaml_path}" @@ -62,3 +69,35 @@ def get_streaming_response( # Yield response character by character for char in response: yield char + + async def get_response_with_lag(self, prompt: str) -> str: + """Get response with artificial lag for non-streaming responses.""" + response = self.get_response(prompt) + if self.lag_enabled: + # Base delay on response length and lag factor + delay = len(response) / (self.lag_factor * 10) + await asyncio.sleep(delay) + return response + + async def get_streaming_response_with_lag( + self, prompt: str, chunk_size: Optional[int] = None + ) -> AsyncGenerator[str, None]: + """Generator that yields response content with artificial lag.""" + response = self.get_response(prompt) + + if chunk_size: + for i in range(0, len(response), chunk_size): + chunk = response[i : i + chunk_size] + if self.lag_enabled: + delay = len(chunk) / (self.lag_factor * 10) + await asyncio.sleep(delay) + yield chunk + else: + for char in response: + if self.lag_enabled: + # Add random variation to character delay + base_delay = 1 / (self.lag_factor * 10) + variation = random.uniform(-0.5, 0.5) * base_delay + delay = max(0, base_delay + variation) + await asyncio.sleep(delay) + yield char diff --git a/src/mockllm/server.py b/src/mockllm/server.py index 3822d68..769d123 100644 --- a/src/mockllm/server.py +++ b/src/mockllm/server.py @@ -30,15 +30,14 @@ async def openai_stream_response(content: str, model: str) -> AsyncGenerator[str, None]: """Generate OpenAI-style streaming response in SSE format.""" - # Send the first message with role first_chunk = OpenAIStreamResponse( model=model, choices=[OpenAIStreamChoice(delta=OpenAIDeltaMessage(role="assistant"))], ) yield f"data: {first_chunk.model_dump_json()}\n\n" - # Stream the content character by character - for chunk in response_config.get_streaming_response(content): + # Stream the content character by character with lag + async for chunk in response_config.get_streaming_response_with_lag(content): chunk_response = OpenAIStreamResponse( model=model, choices=[OpenAIStreamChoice(delta=OpenAIDeltaMessage(content=chunk))], @@ -58,7 +57,7 @@ async def anthropic_stream_response( content: str, model: str ) -> AsyncGenerator[str, None]: """Generate Anthropic-style streaming response in SSE format.""" - for chunk in response_config.get_streaming_response(content): + async for chunk in response_config.get_streaming_response_with_lag(content): stream_response = AnthropicStreamResponse( delta=AnthropicStreamDelta(delta={"text": chunk}) ) @@ -98,7 +97,9 @@ async def openai_chat_completion( media_type="text/event-stream", ) - response_content = response_config.get_response(last_message.content) + response_content = await response_config.get_response_with_lag( + last_message.content + ) # Calculate mock token counts prompt_tokens = len(str(request.messages).split()) @@ -159,7 +160,9 @@ async def anthropic_chat_completion( media_type="text/event-stream", ) - response_content = response_config.get_response(last_message.content) + response_content = await response_config.get_response_with_lag( + last_message.content + ) # Calculate mock token counts prompt_tokens = len(str(request.messages).split()) From d086179f82c35b06abe73b3d77f451e29ee326e2 Mon Sep 17 00:00:00 2001 From: Luke Hinds Date: Sun, 16 Feb 2025 08:24:22 +0000 Subject: [PATCH 2/2] Organise imports --- src/mockllm/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mockllm/config.py b/src/mockllm/config.py index 69749fc..9f58895 100644 --- a/src/mockllm/config.py +++ b/src/mockllm/config.py @@ -1,8 +1,8 @@ -import logging import asyncio +import logging import random from pathlib import Path -from typing import Dict, Generator, Optional, AsyncGenerator +from typing import AsyncGenerator, Dict, Generator, Optional import yaml from fastapi import HTTPException