From b4dbfaf28b26745bb4ca46189f6064e617e6ef43 Mon Sep 17 00:00:00 2001 From: Luke Hinds Date: Fri, 14 Feb 2025 14:12:21 +0000 Subject: [PATCH 1/2] Setup project --- LICENSE | 201 +++++++++++++++++++++++++++++++++++++++++++++++++ Makefile | 73 ++++++++++++++++++ README.md | 155 ++++++++++++-------------------------- pyproject.toml | 79 +++++++++++++++++++ 4 files changed, 402 insertions(+), 106 deletions(-) create mode 100644 LICENSE create mode 100644 Makefile create mode 100644 pyproject.toml diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..2d78330 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, +and distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by +the copyright owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all +other entities that control, are controlled by, or are under common +control with that entity. For the purposes of this definition, +"control" means (i) the power, direct or indirect, to cause the +direction or management of such entity, whether by contract or +otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity +exercising permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, +including but not limited to software source code, documentation +source, and configuration files. + +"Object" form shall mean any form resulting from mechanical +transformation or translation of a Source form, including but +not limited to compiled object code, generated documentation, +and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or +Object form, made available under the License, as indicated by a +copyright notice that is included in or attached to the work +(an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object +form, that is based on (or derived from) the Work and for which the +editorial revisions, annotations, elaborations, or other modifications +represent, as a whole, an original work of authorship. For the purposes +of this License, Derivative Works shall not include works that remain +separable from, or merely link (or bind by name) to the interfaces of, +the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including +the original version of the Work and any modifications or additions +to that Work or Derivative Works thereof, that is intentionally +submitted to Licensor for inclusion in the Work by the copyright owner +or by an individual or Legal Entity authorized to submit on behalf of +the copyright owner. For the purposes of this definition, "submitted" +means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, +and issue tracking systems that are managed by, or on behalf of, the +Licensor for the purpose of discussing and improving the Work, but +excluding communication that is conspicuously marked or otherwise +designated in writing by the copyright owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity +on behalf of whom a Contribution has been received by Licensor and +subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of +this License, each Contributor hereby grants to You a perpetual, +worldwide, non-exclusive, no-charge, royalty-free, irrevocable +copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the +Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of +this License, each Contributor hereby grants to You a perpetual, +worldwide, non-exclusive, no-charge, royalty-free, irrevocable +(except as stated in this section) patent license to make, have made, +use, offer to sell, sell, import, and otherwise transfer the Work, +where such license applies only to those patent claims licensable +by such Contributor that are necessarily infringed by their +Contribution(s) alone or by combination of their Contribution(s) +with the Work to which such Contribution(s) was submitted. If You +institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work +or a Contribution incorporated within the Work constitutes direct +or contributory patent infringement, then any patent licenses +granted to You under this License for that Work shall terminate +as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the +Work or Derivative Works thereof in any medium, with or without +modifications, and in Source or Object form, provided that You +meet the following conditions: + +(a) You must give any other recipients of the Work or +Derivative Works a copy of this License; and + +(b) You must cause any modified files to carry prominent notices +stating that You changed the files; and + +(c) You must retain, in the Source form of any Derivative Works +that You distribute, all copyright, patent, trademark, and +attribution notices from the Source form of the Work, +excluding those notices that do not pertain to any part of +the Derivative Works; and + +(d) If the Work includes a "NOTICE" text file as part of its +distribution, then any Derivative Works that You distribute must +include a readable copy of the attribution notices contained +within such NOTICE file, excluding those notices that do not +pertain to any part of the Derivative Works, in at least one +of the following places: within a NOTICE text file distributed +as part of the Derivative Works; within the Source form or +documentation, if provided along with the Derivative Works; or, +within a display generated by the Derivative Works, if and +wherever such third-party notices normally appear. The contents +of the NOTICE file are for informational purposes only and +do not modify the License. You may add Your own attribution +notices within Derivative Works that You distribute, alongside +or as an addendum to the NOTICE text from the Work, provided +that such additional attribution notices cannot be construed +as modifying the License. + +You may add Your own copyright statement to Your modifications and +may provide additional or different license terms and conditions +for use, reproduction, or distribution of Your modifications, or +for any such Derivative Works as a whole, provided Your use, +reproduction, and distribution of the Work otherwise complies with +the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, +any Contribution intentionally submitted for inclusion in the Work +by You to the Licensor shall be under the terms and conditions of +this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify +the terms of any separate license agreement you may have executed +with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade +names, trademarks, service marks, or product names of the Licensor, +except as required for reasonable and customary use in describing the +origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or +agreed to in writing, Licensor provides the Work (and each +Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +implied, including, without limitation, any warranties or conditions +of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A +PARTICULAR PURPOSE. You are solely responsible for determining the +appropriateness of using or redistributing the Work and assume any +risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, +whether in tort (including negligence), contract, or otherwise, +unless required by applicable law (such as deliberate and grossly +negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, +incidental, or consequential damages of any character arising as a +result of this License or out of the use or inability to use the +Work (including but not limited to damages for loss of goodwill, +work stoppage, computer failure or malfunction, or any and all +other commercial damages or losses), even if such Contributor +has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing +the Work or Derivative Works thereof, You may choose to offer, +and charge a fee for, acceptance of support, warranty, indemnity, +or other liability obligations and/or rights consistent with this +License. However, in accepting such obligations, You may act only +on Your own behalf and on Your sole responsibility, not on behalf +of any other Contributor, and only if You agree to indemnify, +defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason +of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + +To apply the Apache License to your work, attach the following +boilerplate notice, with the fields enclosed by brackets "[]" +replaced with your own identifying information. (Don't include +the brackets!) The text should be enclosed in the appropriate +comment syntax for the file format. We also recommend that a +file or class name and description of purpose be included on the +same "printed page" as the copyright notice for easier +identification within third-party archives. + +Copyright 2025 Stacklok, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f66bede --- /dev/null +++ b/Makefile @@ -0,0 +1,73 @@ +.PHONY: all setup test lint format check clean + +# Default target +all: setup lint test + +# Setup development environment +setup: + python -m pip install --upgrade pip + pip install -e ".[dev]" + +# Run tests +test: + pytest tests/ -v + +# Run all linting and type checking +lint: format-check lint-check type-check + +# Format code +format: + black . + isort . + +# Check formatting +format-check: + black --check . + isort --check . + +# Run linting +lint-check: + ruff check . + +# Run type checking +type-check: + mypy src/ + +# Clean up +clean: + rm -rf build/ + rm -rf dist/ + rm -rf *.egg-info + rm -rf .pytest_cache + rm -rf .mypy_cache + rm -rf .ruff_cache + find . -type d -name __pycache__ -exec rm -rf {} + + find . -type f -name "*.pyc" -delete + +# Build package +build: clean + python -m build + +# Install package locally +install: + pip install -e . + +# Install development dependencies +install-dev: + pip install -e ".[dev]" + +# Help target +help: + @echo "Available targets:" + @echo " all : Run setup, lint, and test" + @echo " setup : Set up development environment" + @echo " test : Run tests" + @echo " lint : Run all code quality checks" + @echo " format : Format code with black and isort" + @echo " format-check : Check code formatting" + @echo " lint-check : Run ruff linter" + @echo " type-check : Run mypy type checker" + @echo " clean : Clean up build artifacts" + @echo " build : Build package" + @echo " install : Install package locally" + @echo " install-dev : Install package with development dependencies" \ No newline at end of file diff --git a/README.md b/README.md index 9bf0aaf..a2624f4 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,15 @@ # Mock LLM Server -A FastAPI-based mock LLM server that mimics OpenAI and Anthropic API formats. Instead of calling actual language models, +[![CI](https://github.com/stacklok/mockllm/actions/workflows/ci.yml/badge.svg)](https://github.com/stacklok/mockllm/actions/workflows/ci.yml) +[![PyPI version](https://badge.fury.io/py/mockllm.svg)](https://badge.fury.io/py/mockllm) +[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) +A FastAPI-based mock LLM server that mimics OpenAI and Anthropic API formats. Instead of calling actual language models, it uses predefined responses from a YAML configuration file. This is made for when you want a deterministic response for testing or development purposes. -Check out the [CodeGate](https://github.com/stacklok/codegate) when you're done here!. +Check out the [CodeGate](https://github.com/stacklok/codegate) when you're done here! ## Features @@ -18,12 +21,19 @@ Check out the [CodeGate](https://github.com/stacklok/codegate) when you're done - Error handling - Mock token counting - ## Installation +### From PyPI + +```bash +pip install mockllm +``` + +### From Source + 1. Clone the repository: ```bash -git clone https://github.com/lukehinds/mockllm.git +git clone https://github.com/stacklok/mockllm.git cd mockllm ``` @@ -35,7 +45,9 @@ source venv/bin/activate # On Windows, use: venv\Scripts\activate 3. Install dependencies: ```bash -pip install -r requirements.txt +pip install -e ".[dev]" # Install with development dependencies +# or +pip install -e . # Install without development dependencies ``` ## Usage @@ -48,11 +60,11 @@ cp example.responses.yml responses.yml 2. Start the server: ```bash -python main.py +python -m mockllm ``` Or using uvicorn directly: ```bash -uvicorn src.mockllm.server:app --reload +uvicorn mockllm.server:app --reload ``` The server will start on `http://localhost:8000` @@ -136,118 +148,49 @@ defaults: The server automatically detects changes to `responses.yml` and reloads the configuration without requiring a restart. -## API Format - -### OpenAI Format - -#### Request Format - -```json -{ - "model": "mock-llm", - "messages": [ - {"role": "user", "content": "what colour is the sky?"} - ], - "temperature": 0.7, - "max_tokens": 150, - "stream": false -} -``` +## Development -#### Response Format - -Regular response: -```json -{ - "id": "mock-123", - "object": "chat.completion", - "created": 1700000000, - "model": "mock-llm", - "choices": [ - { - "message": { - "role": "assistant", - "content": "The sky is blue during a clear day due to a phenomenon called Rayleigh scattering." - }, - "finish_reason": "stop" - } - ], - "usage": { - "prompt_tokens": 10, - "completion_tokens": 5, - "total_tokens": 15 - } -} -``` +The project includes a Makefile to help with common development tasks: -Streaming response (Server-Sent Events format): -``` -data: {"id":"mock-123","object":"chat.completion.chunk","created":1700000000,"model":"mock-llm","choices":[{"delta":{"role":"assistant"},"index":0}]} +```bash +# Set up development environment +make setup -data: {"id":"mock-124","object":"chat.completion.chunk","created":1700000000,"model":"mock-llm","choices":[{"delta":{"content":"T"},"index":0}]} +# Run all checks (setup, lint, test) +make all -data: {"id":"mock-125","object":"chat.completion.chunk","created":1700000000,"model":"mock-llm","choices":[{"delta":{"content":"h"},"index":0}]} +# Run tests +make test -... (character by character) +# Format code +make format -data: {"id":"mock-999","object":"chat.completion.chunk","created":1700000000,"model":"mock-llm","choices":[{"delta":{},"index":0,"finish_reason":"stop"}]} +# Run all linting and type checking +make lint -data: [DONE] -``` +# Clean up build artifacts +make clean -### Anthropic Format - -#### Request Format - -```json -{ - "model": "claude-3-sonnet-20240229", - "messages": [ - {"role": "user", "content": "what colour is the sky?"} - ], - "max_tokens": 1024, - "stream": false -} +# See all available commands +make help ``` -#### Response Format - -Regular response: -```json -{ - "id": "mock-123", - "type": "message", - "role": "assistant", - "model": "claude-3-sonnet-20240229", - "content": [ - { - "type": "text", - "text": "The sky is blue during a clear day due to a phenomenon called Rayleigh scattering." - } - ], - "usage": { - "input_tokens": 10, - "output_tokens": 5, - "total_tokens": 15 - } -} -``` +### Development Commands -Streaming response (Server-Sent Events format): -``` -data: {"type":"message_delta","id":"mock-123","delta":{"type":"content_block_delta","index":0,"delta":{"text":"T"}}} +- `make setup`: Install all development dependencies +- `make test`: Run the test suite +- `make format`: Format code with black and isort +- `make lint`: Run all code quality checks (format, lint, type) +- `make build`: Build the package +- `make clean`: Remove build artifacts and cache files +- `make install-dev`: Install package with development dependencies -data: {"type":"message_delta","id":"mock-123","delta":{"type":"content_block_delta","index":0,"delta":{"text":"h"}}} +For more details on available commands, run `make help`. -... (character by character) - -data: [DONE] -``` +## Contributing -## Error Handling +Contributions are welcome! Please feel free to submit a Pull Request. -The server includes comprehensive error handling: +## License -- Invalid requests return 400 status codes with descriptive messages -- Server errors return 500 status codes with error details -- All errors are logged using JSON format +This project is licensed under the Apache License, Version 2.0 - see the [LICENSE](LICENSE) file for details. diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..2abde4e --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,79 @@ +[build-system] +requires = ["setuptools>=45", "wheel", "setuptools_scm>=6.2"] +build-backend = "setuptools.build_meta" + +[project] +name = "mockllm" +dynamic = ["version"] +description = "A mock server that mimics OpenAI and Anthropic API formats for testing" +readme = "README.md" +requires-python = ">=3.8" +license = {text = "Apache-2.0"} +keywords = ["mock", "llm", "openai", "anthropic", "testing"] +authors = [ + {name = "Luke Hinds", email = "luke@stacklok.com"} +] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Topic :: Software Development :: Testing", +] +dependencies = [ + "fastapi>=0.68.0", + "uvicorn>=0.15.0", + "pydantic>=2.0.0", + "python-json-logger>=2.0.0", + "pyyaml>=5.4.1", + "watchdog>=2.1.0", +] + +[project.optional-dependencies] +dev = [ + "pytest>=6.0", + "pytest-asyncio>=0.15.0", + "black>=21.0", + "isort>=5.0", + "mypy>=0.900", + "ruff>=0.1.0", +] + +[project.urls] +Homepage = "https://github.com/stacklok/mockllm" +Repository = "https://github.com/stacklok/mockllm.git" +Issues = "https://github.com/stacklok/mockllm/issues" + +[tool.setuptools] +packages = ["mockllm"] +package-dir = {"" = "src"} + +[tool.setuptools_scm] +write_to = "src/mockllm/_version.py" + +[tool.black] +line-length = 88 +target-version = ["py38"] +include = '\.pyi?$' + +[tool.isort] +profile = "black" +multi_line_output = 3 +line_length = 88 + +[tool.mypy] +python_version = "3.8" +warn_return_any = true +warn_unused_configs = true +disallow_untyped_defs = true +check_untyped_defs = true + +[tool.ruff] +line-length = 88 +target-version = "py38" +select = ["E", "F", "B", "I"] +ignore = [] \ No newline at end of file From b803170eca49a87541f0d648ee38b348a1da8d9d Mon Sep 17 00:00:00 2001 From: Luke Hinds Date: Fri, 14 Feb 2025 14:12:53 +0000 Subject: [PATCH 2/2] Formatting --- main.py | 2 +- src/mockllm/__init__.py | 2 +- src/mockllm/_version.py | 17 ++++++ src/mockllm/config.py | 24 +++++--- src/mockllm/models.py | 27 ++++++++- src/mockllm/server.py | 131 ++++++++++++++++++++-------------------- 6 files changed, 123 insertions(+), 80 deletions(-) create mode 100644 src/mockllm/_version.py diff --git a/main.py b/main.py index 6968ba6..3d4740a 100644 --- a/main.py +++ b/main.py @@ -3,4 +3,4 @@ from src.mockllm.server import app if __name__ == "__main__": - uvicorn.run(app, host="0.0.0.0", port=8000, reload=True) \ No newline at end of file + uvicorn.run(app, host="0.0.0.0", port=8000, reload=True) diff --git a/src/mockllm/__init__.py b/src/mockllm/__init__.py index 39a6a59..d5a7792 100644 --- a/src/mockllm/__init__.py +++ b/src/mockllm/__init__.py @@ -2,4 +2,4 @@ Mock LLM Server - You will do what I tell you! """ -__version__ = "0.1.0" \ No newline at end of file +__version__ = "0.1.0" diff --git a/src/mockllm/_version.py b/src/mockllm/_version.py new file mode 100644 index 0000000..9bd6f7c --- /dev/null +++ b/src/mockllm/_version.py @@ -0,0 +1,17 @@ +# file generated by setuptools_scm +# don't change, don't track in version control +TYPE_CHECKING = False +if TYPE_CHECKING: + from typing import Tuple, Union + + VERSION_TUPLE = Tuple[Union[int, str], ...] +else: + VERSION_TUPLE = object + +version: str +__version__: str +__version_tuple__: VERSION_TUPLE +version_tuple: VERSION_TUPLE + +__version__ = version = "0.1.dev13+gb4dbfaf" +__version_tuple__ = version_tuple = (0, 1, "dev13", "gb4dbfaf") diff --git a/src/mockllm/config.py b/src/mockllm/config.py index 4943726..83646d1 100644 --- a/src/mockllm/config.py +++ b/src/mockllm/config.py @@ -11,6 +11,7 @@ logging.basicConfig(level=logging.INFO, handlers=[log_handler]) logger = logging.getLogger(__name__) + class ResponseConfig: """Handles loading and managing response configurations from YAML.""" @@ -26,19 +27,20 @@ def load_responses(self) -> None: try: current_mtime = Path(self.yaml_path).stat().st_mtime if current_mtime > self.last_modified: - with open(self.yaml_path, 'r') as f: + with open(self.yaml_path, "r") as f: data = yaml.safe_load(f) - self.responses = data.get('responses', {}) - self.default_response = data.get('defaults', {}).get( - 'unknown_response', self.default_response + self.responses = data.get("responses", {}) + self.default_response = data.get("defaults", {}).get( + "unknown_response", self.default_response ) self.last_modified = current_mtime - logger.info(f"Loaded {len(self.responses)} responses from {self.yaml_path}") + logger.info( + f"Loaded {len(self.responses)} responses from {self.yaml_path}" + ) except Exception as e: logger.error(f"Error loading responses: {str(e)}") raise HTTPException( - status_code=500, - detail="Failed to load response configuration" + status_code=500, detail="Failed to load response configuration" ) def get_response(self, prompt: str) -> str: @@ -46,14 +48,16 @@ def get_response(self, prompt: str) -> str: self.load_responses() # Check for updates return self.responses.get(prompt.lower().strip(), self.default_response) - def get_streaming_response(self, prompt: str, chunk_size: Optional[int] = None) -> str: + def get_streaming_response( + self, prompt: str, chunk_size: Optional[int] = None + ) -> str: """Generator that yields response content character by character or in chunks.""" response = self.get_response(prompt) if chunk_size: # Yield response in chunks for i in range(0, len(response), chunk_size): - yield response[i:i + chunk_size] + yield response[i : i + chunk_size] else: # Yield response character by character for char in response: - yield char \ No newline at end of file + yield char diff --git a/src/mockllm/models.py b/src/mockllm/models.py index ae3f62e..ef2f671 100644 --- a/src/mockllm/models.py +++ b/src/mockllm/models.py @@ -1,42 +1,54 @@ import time import uuid -from typing import Dict, List, Optional, Literal +from typing import Dict, List, Literal, Optional from pydantic import BaseModel, Field + # OpenAI Models class OpenAIMessage(BaseModel): """OpenAI chat message model.""" + role: str content: str + class OpenAIChatRequest(BaseModel): """OpenAI chat completion request model.""" + model: str messages: List[OpenAIMessage] temperature: Optional[float] = Field(default=0.7) max_tokens: Optional[int] = Field(default=150) stream: Optional[bool] = Field(default=False) + class OpenAIDeltaMessage(BaseModel): """OpenAI streaming delta message model.""" + role: Optional[str] = None content: Optional[str] = None + class OpenAIStreamChoice(BaseModel): """OpenAI streaming choice model.""" + delta: OpenAIDeltaMessage index: int = 0 finish_reason: Optional[str] = None + class OpenAIChatChoice(BaseModel): """OpenAI regular chat choice model.""" + message: OpenAIMessage index: int = 0 finish_reason: str = "stop" + class OpenAIChatResponse(BaseModel): """OpenAI chat completion response model.""" + id: str = Field(default_factory=lambda: f"mock-{uuid.uuid4()}") object: str = "chat.completion" created: int = Field(default_factory=lambda: int(time.time())) @@ -44,30 +56,38 @@ class OpenAIChatResponse(BaseModel): choices: List[Dict] usage: Dict[str, int] + class OpenAIStreamResponse(BaseModel): """OpenAI streaming response model.""" + id: str = Field(default_factory=lambda: f"mock-{uuid.uuid4()}") object: str = "chat.completion.chunk" created: int = Field(default_factory=lambda: int(time.time())) model: str choices: List[OpenAIStreamChoice] + # Anthropic Models class AnthropicMessage(BaseModel): """Anthropic message model.""" + role: Literal["user", "assistant"] content: str + class AnthropicChatRequest(BaseModel): """Anthropic chat completion request model.""" + model: str max_tokens: Optional[int] = Field(default=1024) messages: List[AnthropicMessage] stream: Optional[bool] = Field(default=False) temperature: Optional[float] = Field(default=1.0) + class AnthropicChatResponse(BaseModel): """Anthropic chat completion response model.""" + id: str = Field(default_factory=lambda: f"mock-{uuid.uuid4()}") type: str = "message" role: str = "assistant" @@ -77,19 +97,24 @@ class AnthropicChatResponse(BaseModel): stop_sequence: Optional[str] = None usage: Dict[str, int] + class AnthropicStreamDelta(BaseModel): """Anthropic streaming delta model.""" + type: str = "content_block_delta" index: int = 0 delta: Dict[str, str] + class AnthropicStreamResponse(BaseModel): """Anthropic streaming response model.""" + type: str = "message_delta" id: str = Field(default_factory=lambda: f"mock-{uuid.uuid4()}") delta: AnthropicStreamDelta usage: Optional[Dict[str, int]] = None + # For backward compatibility Message = OpenAIMessage ChatRequest = OpenAIChatRequest diff --git a/src/mockllm/server.py b/src/mockllm/server.py index aa90469..b0d641d 100644 --- a/src/mockllm/server.py +++ b/src/mockllm/server.py @@ -6,10 +6,17 @@ from pythonjsonlogger import jsonlogger from .config import ResponseConfig -from .models import (OpenAIChatRequest, OpenAIChatResponse, OpenAIDeltaMessage, - OpenAIStreamChoice, OpenAIStreamResponse, - AnthropicChatRequest, AnthropicChatResponse, - AnthropicStreamResponse, AnthropicStreamDelta) +from .models import ( + AnthropicChatRequest, + AnthropicChatResponse, + AnthropicStreamDelta, + AnthropicStreamResponse, + OpenAIChatRequest, + OpenAIChatResponse, + OpenAIDeltaMessage, + OpenAIStreamChoice, + OpenAIStreamResponse, +) log_handler = logging.StreamHandler() log_handler.setFormatter(jsonlogger.JsonFormatter()) @@ -20,16 +27,13 @@ response_config = ResponseConfig() + async def openai_stream_response(content: str, model: str) -> AsyncGenerator[str, None]: """Generate OpenAI-style streaming response in SSE format.""" # Send the first message with role first_chunk = OpenAIStreamResponse( model=model, - choices=[ - OpenAIStreamChoice( - delta=OpenAIDeltaMessage(role="assistant") - ) - ] + choices=[OpenAIStreamChoice(delta=OpenAIDeltaMessage(role="assistant"))], ) yield f"data: {first_chunk.model_dump_json()}\n\n" @@ -37,64 +41,60 @@ async def openai_stream_response(content: str, model: str) -> AsyncGenerator[str for chunk in response_config.get_streaming_response(content): chunk_response = OpenAIStreamResponse( model=model, - choices=[ - OpenAIStreamChoice( - delta=OpenAIDeltaMessage(content=chunk) - ) - ] + choices=[OpenAIStreamChoice(delta=OpenAIDeltaMessage(content=chunk))], ) yield f"data: {chunk_response.model_dump_json()}\n\n" # Send the final message final_chunk = OpenAIStreamResponse( model=model, - choices=[ - OpenAIStreamChoice( - delta=OpenAIDeltaMessage(), - finish_reason="stop" - ) - ] + choices=[OpenAIStreamChoice(delta=OpenAIDeltaMessage(), finish_reason="stop")], ) yield f"data: {final_chunk.model_dump_json()}\n\n" yield "data: [DONE]\n\n" -async def anthropic_stream_response(content: str, model: str) -> AsyncGenerator[str, None]: + +async def anthropic_stream_response( + content: str, model: str +) -> AsyncGenerator[str, None]: """Generate Anthropic-style streaming response in SSE format.""" for chunk in response_config.get_streaming_response(content): stream_response = AnthropicStreamResponse( - delta=AnthropicStreamDelta( - delta={"text": chunk} - ) + delta=AnthropicStreamDelta(delta={"text": chunk}) ) yield f"data: {stream_response.model_dump_json()}\n\n" - + yield "data: [DONE]\n\n" + @app.post("/v1/chat/completions", response_model=None) -async def openai_chat_completion(request: OpenAIChatRequest) -> Union[OpenAIChatResponse, StreamingResponse]: +async def openai_chat_completion( + request: OpenAIChatRequest, +) -> Union[OpenAIChatResponse, StreamingResponse]: """Handle chat completion requests, supporting both regular and streaming responses.""" try: - logger.info("Received chat completion request", extra={ - "model": request.model, - "message_count": len(request.messages), - "stream": request.stream - }) + logger.info( + "Received chat completion request", + extra={ + "model": request.model, + "message_count": len(request.messages), + "stream": request.stream, + }, + ) last_message = next( - (msg for msg in reversed(request.messages) if msg.role == "user"), - None + (msg for msg in reversed(request.messages) if msg.role == "user"), None ) if not last_message: raise HTTPException( - status_code=400, - detail="No user message found in request" + status_code=400, detail="No user message found in request" ) if request.stream: return StreamingResponse( openai_stream_response(last_message.content, request.model), - media_type="text/event-stream" + media_type="text/event-stream", ) response_content = response_config.get_response(last_message.content) @@ -106,53 +106,53 @@ async def openai_chat_completion(request: OpenAIChatRequest) -> Union[OpenAIChat return OpenAIChatResponse( model=request.model, - choices=[{ - "index": 0, - "message": { - "role": "assistant", - "content": response_content - }, - "finish_reason": "stop" - }], + choices=[ + { + "index": 0, + "message": {"role": "assistant", "content": response_content}, + "finish_reason": "stop", + } + ], usage={ "prompt_tokens": prompt_tokens, "completion_tokens": completion_tokens, - "total_tokens": total_tokens - } + "total_tokens": total_tokens, + }, ) except Exception as e: logger.error(f"Error processing request: {str(e)}") - raise HTTPException( - status_code=500, - detail=f"Internal server error: {str(e)}" - ) + raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}") + @app.post("/v1/messages", response_model=None) -async def anthropic_chat_completion(request: AnthropicChatRequest) -> Union[AnthropicChatResponse, StreamingResponse]: +async def anthropic_chat_completion( + request: AnthropicChatRequest, +) -> Union[AnthropicChatResponse, StreamingResponse]: """Handle Anthropic chat completion requests, supporting both regular and streaming responses.""" try: - logger.info("Received Anthropic chat completion request", extra={ - "model": request.model, - "message_count": len(request.messages), - "stream": request.stream - }) + logger.info( + "Received Anthropic chat completion request", + extra={ + "model": request.model, + "message_count": len(request.messages), + "stream": request.stream, + }, + ) last_message = next( - (msg for msg in reversed(request.messages) if msg.role == "user"), - None + (msg for msg in reversed(request.messages) if msg.role == "user"), None ) if not last_message: raise HTTPException( - status_code=400, - detail="No user message found in request" + status_code=400, detail="No user message found in request" ) if request.stream: return StreamingResponse( anthropic_stream_response(last_message.content, request.model), - media_type="text/event-stream" + media_type="text/event-stream", ) response_content = response_config.get_response(last_message.content) @@ -168,13 +168,10 @@ async def anthropic_chat_completion(request: AnthropicChatRequest) -> Union[Anth usage={ "input_tokens": prompt_tokens, "output_tokens": completion_tokens, - "total_tokens": total_tokens - } + "total_tokens": total_tokens, + }, ) except Exception as e: logger.error(f"Error processing request: {str(e)}") - raise HTTPException( - status_code=500, - detail=f"Internal server error: {str(e)}" - ) + raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")