From b4dbfaf28b26745bb4ca46189f6064e617e6ef43 Mon Sep 17 00:00:00 2001
From: Luke Hinds <luke@stacklok.com>
Date: Fri, 14 Feb 2025 14:12:21 +0000
Subject: [PATCH 1/2] Setup project

---
 LICENSE        | 201 +++++++++++++++++++++++++++++++++++++++++++++++++
 Makefile       |  73 ++++++++++++++++++
 README.md      | 155 ++++++++++++--------------------------
 pyproject.toml |  79 +++++++++++++++++++
 4 files changed, 402 insertions(+), 106 deletions(-)
 create mode 100644 LICENSE
 create mode 100644 Makefile
 create mode 100644 pyproject.toml

diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..2d78330
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,201 @@
+Apache License
+Version 2.0, January 2004
+http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+"License" shall mean the terms and conditions for use, reproduction,
+and distribution as defined by Sections 1 through 9 of this document.
+
+"Licensor" shall mean the copyright owner or entity authorized by
+the copyright owner that is granting the License.
+
+"Legal Entity" shall mean the union of the acting entity and all
+other entities that control, are controlled by, or are under common
+control with that entity. For the purposes of this definition,
+"control" means (i) the power, direct or indirect, to cause the
+direction or management of such entity, whether by contract or
+otherwise, or (ii) ownership of fifty percent (50%) or more of the
+outstanding shares, or (iii) beneficial ownership of such entity.
+
+"You" (or "Your") shall mean an individual or Legal Entity
+exercising permissions granted by this License.
+
+"Source" form shall mean the preferred form for making modifications,
+including but not limited to software source code, documentation
+source, and configuration files.
+
+"Object" form shall mean any form resulting from mechanical
+transformation or translation of a Source form, including but
+not limited to compiled object code, generated documentation,
+and conversions to other media types.
+
+"Work" shall mean the work of authorship, whether in Source or
+Object form, made available under the License, as indicated by a
+copyright notice that is included in or attached to the work
+(an example is provided in the Appendix below).
+
+"Derivative Works" shall mean any work, whether in Source or Object
+form, that is based on (or derived from) the Work and for which the
+editorial revisions, annotations, elaborations, or other modifications
+represent, as a whole, an original work of authorship. For the purposes
+of this License, Derivative Works shall not include works that remain
+separable from, or merely link (or bind by name) to the interfaces of,
+the Work and Derivative Works thereof.
+
+"Contribution" shall mean any work of authorship, including
+the original version of the Work and any modifications or additions
+to that Work or Derivative Works thereof, that is intentionally
+submitted to Licensor for inclusion in the Work by the copyright owner
+or by an individual or Legal Entity authorized to submit on behalf of
+the copyright owner. For the purposes of this definition, "submitted"
+means any form of electronic, verbal, or written communication sent
+to the Licensor or its representatives, including but not limited to
+communication on electronic mailing lists, source code control systems,
+and issue tracking systems that are managed by, or on behalf of, the
+Licensor for the purpose of discussing and improving the Work, but
+excluding communication that is conspicuously marked or otherwise
+designated in writing by the copyright owner as "Not a Contribution."
+
+"Contributor" shall mean Licensor and any individual or Legal Entity
+on behalf of whom a Contribution has been received by Licensor and
+subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+this License, each Contributor hereby grants to You a perpetual,
+worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+copyright license to reproduce, prepare Derivative Works of,
+publicly display, publicly perform, sublicense, and distribute the
+Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+this License, each Contributor hereby grants to You a perpetual,
+worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+(except as stated in this section) patent license to make, have made,
+use, offer to sell, sell, import, and otherwise transfer the Work,
+where such license applies only to those patent claims licensable
+by such Contributor that are necessarily infringed by their
+Contribution(s) alone or by combination of their Contribution(s)
+with the Work to which such Contribution(s) was submitted. If You
+institute patent litigation against any entity (including a
+cross-claim or counterclaim in a lawsuit) alleging that the Work
+or a Contribution incorporated within the Work constitutes direct
+or contributory patent infringement, then any patent licenses
+granted to You under this License for that Work shall terminate
+as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+Work or Derivative Works thereof in any medium, with or without
+modifications, and in Source or Object form, provided that You
+meet the following conditions:
+
+(a) You must give any other recipients of the Work or
+Derivative Works a copy of this License; and
+
+(b) You must cause any modified files to carry prominent notices
+stating that You changed the files; and
+
+(c) You must retain, in the Source form of any Derivative Works
+that You distribute, all copyright, patent, trademark, and
+attribution notices from the Source form of the Work,
+excluding those notices that do not pertain to any part of
+the Derivative Works; and
+
+(d) If the Work includes a "NOTICE" text file as part of its
+distribution, then any Derivative Works that You distribute must
+include a readable copy of the attribution notices contained
+within such NOTICE file, excluding those notices that do not
+pertain to any part of the Derivative Works, in at least one
+of the following places: within a NOTICE text file distributed
+as part of the Derivative Works; within the Source form or
+documentation, if provided along with the Derivative Works; or,
+within a display generated by the Derivative Works, if and
+wherever such third-party notices normally appear. The contents
+of the NOTICE file are for informational purposes only and
+do not modify the License. You may add Your own attribution
+notices within Derivative Works that You distribute, alongside
+or as an addendum to the NOTICE text from the Work, provided
+that such additional attribution notices cannot be construed
+as modifying the License.
+
+You may add Your own copyright statement to Your modifications and
+may provide additional or different license terms and conditions
+for use, reproduction, or distribution of Your modifications, or
+for any such Derivative Works as a whole, provided Your use,
+reproduction, and distribution of the Work otherwise complies with
+the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+any Contribution intentionally submitted for inclusion in the Work
+by You to the Licensor shall be under the terms and conditions of
+this License, without any additional terms or conditions.
+Notwithstanding the above, nothing herein shall supersede or modify
+the terms of any separate license agreement you may have executed
+with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+names, trademarks, service marks, or product names of the Licensor,
+except as required for reasonable and customary use in describing the
+origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+agreed to in writing, Licensor provides the Work (and each
+Contributor provides its Contributions) on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+implied, including, without limitation, any warranties or conditions
+of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+PARTICULAR PURPOSE. You are solely responsible for determining the
+appropriateness of using or redistributing the Work and assume any
+risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+whether in tort (including negligence), contract, or otherwise,
+unless required by applicable law (such as deliberate and grossly
+negligent acts) or agreed to in writing, shall any Contributor be
+liable to You for damages, including any direct, indirect, special,
+incidental, or consequential damages of any character arising as a
+result of this License or out of the use or inability to use the
+Work (including but not limited to damages for loss of goodwill,
+work stoppage, computer failure or malfunction, or any and all
+other commercial damages or losses), even if such Contributor
+has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+the Work or Derivative Works thereof, You may choose to offer,
+and charge a fee for, acceptance of support, warranty, indemnity,
+or other liability obligations and/or rights consistent with this
+License. However, in accepting such obligations, You may act only
+on Your own behalf and on Your sole responsibility, not on behalf
+of any other Contributor, and only if You agree to indemnify,
+defend, and hold each Contributor harmless for any liability
+incurred by, or claims asserted against, such Contributor by reason
+of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work.
+
+To apply the Apache License to your work, attach the following
+boilerplate notice, with the fields enclosed by brackets "[]"
+replaced with your own identifying information. (Don't include
+the brackets!)  The text should be enclosed in the appropriate
+comment syntax for the file format. We also recommend that a
+file or class name and description of purpose be included on the
+same "printed page" as the copyright notice for easier
+identification within third-party archives.
+
+Copyright 2025 Stacklok, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
\ No newline at end of file
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..f66bede
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,73 @@
+.PHONY: all setup test lint format check clean
+
+# Default target
+all: setup lint test
+
+# Setup development environment
+setup:
+	python -m pip install --upgrade pip
+	pip install -e ".[dev]"
+
+# Run tests
+test:
+	pytest tests/ -v
+
+# Run all linting and type checking
+lint: format-check lint-check type-check
+
+# Format code
+format:
+	black .
+	isort .
+
+# Check formatting
+format-check:
+	black --check .
+	isort --check .
+
+# Run linting
+lint-check:
+	ruff check .
+
+# Run type checking
+type-check:
+	mypy src/
+
+# Clean up
+clean:
+	rm -rf build/
+	rm -rf dist/
+	rm -rf *.egg-info
+	rm -rf .pytest_cache
+	rm -rf .mypy_cache
+	rm -rf .ruff_cache
+	find . -type d -name __pycache__ -exec rm -rf {} +
+	find . -type f -name "*.pyc" -delete
+
+# Build package
+build: clean
+	python -m build
+
+# Install package locally
+install:
+	pip install -e .
+
+# Install development dependencies
+install-dev:
+	pip install -e ".[dev]"
+
+# Help target
+help:
+	@echo "Available targets:"
+	@echo "  all          : Run setup, lint, and test"
+	@echo "  setup        : Set up development environment"
+	@echo "  test         : Run tests"
+	@echo "  lint         : Run all code quality checks"
+	@echo "  format       : Format code with black and isort"
+	@echo "  format-check : Check code formatting"
+	@echo "  lint-check   : Run ruff linter"
+	@echo "  type-check   : Run mypy type checker"
+	@echo "  clean        : Clean up build artifacts"
+	@echo "  build        : Build package"
+	@echo "  install      : Install package locally"
+	@echo "  install-dev  : Install package with development dependencies"
\ No newline at end of file
diff --git a/README.md b/README.md
index 9bf0aaf..a2624f4 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,15 @@
 # Mock LLM Server
 
-A FastAPI-based mock LLM server that mimics OpenAI and Anthropic API formats. Instead of calling actual language models,
+[![CI](https://github.com/stacklok/mockllm/actions/workflows/ci.yml/badge.svg)](https://github.com/stacklok/mockllm/actions/workflows/ci.yml)
+[![PyPI version](https://badge.fury.io/py/mockllm.svg)](https://badge.fury.io/py/mockllm)
+[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
 
+A FastAPI-based mock LLM server that mimics OpenAI and Anthropic API formats. Instead of calling actual language models,
 it uses predefined responses from a YAML configuration file. 
 
 This is made for when you want a deterministic response for testing or development purposes.
 
-Check out the [CodeGate](https://github.com/stacklok/codegate) when you're done here!.
+Check out the [CodeGate](https://github.com/stacklok/codegate) when you're done here!
 
 ## Features
 
@@ -18,12 +21,19 @@ Check out the [CodeGate](https://github.com/stacklok/codegate) when you're done
 - Error handling
 - Mock token counting
 
-
 ## Installation
 
+### From PyPI
+
+```bash
+pip install mockllm
+```
+
+### From Source
+
 1. Clone the repository:
 ```bash
-git clone https://github.com/lukehinds/mockllm.git
+git clone https://github.com/stacklok/mockllm.git
 cd mockllm
 ```
 
@@ -35,7 +45,9 @@ source venv/bin/activate  # On Windows, use: venv\Scripts\activate
 
 3. Install dependencies:
 ```bash
-pip install -r requirements.txt
+pip install -e ".[dev]"  # Install with development dependencies
+# or
+pip install -e .         # Install without development dependencies
 ```
 
 ## Usage
@@ -48,11 +60,11 @@ cp example.responses.yml responses.yml
 
 2. Start the server:
 ```bash
-python main.py
+python -m mockllm
 ```
 Or using uvicorn directly:
 ```bash
-uvicorn src.mockllm.server:app --reload
+uvicorn mockllm.server:app --reload
 ```
 
 The server will start on `http://localhost:8000`
@@ -136,118 +148,49 @@ defaults:
 
 The server automatically detects changes to `responses.yml` and reloads the configuration without requiring a restart.
 
-## API Format
-
-### OpenAI Format
-
-#### Request Format
-
-```json
-{
-  "model": "mock-llm",
-  "messages": [
-    {"role": "user", "content": "what colour is the sky?"}
-  ],
-  "temperature": 0.7,
-  "max_tokens": 150,
-  "stream": false
-}
-```
+## Development
 
-#### Response Format
-
-Regular response:
-```json
-{
-  "id": "mock-123",
-  "object": "chat.completion",
-  "created": 1700000000,
-  "model": "mock-llm",
-  "choices": [
-    {
-      "message": {
-        "role": "assistant",
-        "content": "The sky is blue during a clear day due to a phenomenon called Rayleigh scattering."
-      },
-      "finish_reason": "stop"
-    }
-  ],
-  "usage": {
-    "prompt_tokens": 10,
-    "completion_tokens": 5,
-    "total_tokens": 15
-  }
-}
-```
+The project includes a Makefile to help with common development tasks:
 
-Streaming response (Server-Sent Events format):
-```
-data: {"id":"mock-123","object":"chat.completion.chunk","created":1700000000,"model":"mock-llm","choices":[{"delta":{"role":"assistant"},"index":0}]}
+```bash
+# Set up development environment
+make setup
 
-data: {"id":"mock-124","object":"chat.completion.chunk","created":1700000000,"model":"mock-llm","choices":[{"delta":{"content":"T"},"index":0}]}
+# Run all checks (setup, lint, test)
+make all
 
-data: {"id":"mock-125","object":"chat.completion.chunk","created":1700000000,"model":"mock-llm","choices":[{"delta":{"content":"h"},"index":0}]}
+# Run tests
+make test
 
-... (character by character)
+# Format code
+make format
 
-data: {"id":"mock-999","object":"chat.completion.chunk","created":1700000000,"model":"mock-llm","choices":[{"delta":{},"index":0,"finish_reason":"stop"}]}
+# Run all linting and type checking
+make lint
 
-data: [DONE]
-```
+# Clean up build artifacts
+make clean
 
-### Anthropic Format
-
-#### Request Format
-
-```json
-{
-  "model": "claude-3-sonnet-20240229",
-  "messages": [
-    {"role": "user", "content": "what colour is the sky?"}
-  ],
-  "max_tokens": 1024,
-  "stream": false
-}
+# See all available commands
+make help
 ```
 
-#### Response Format
-
-Regular response:
-```json
-{
-  "id": "mock-123",
-  "type": "message",
-  "role": "assistant",
-  "model": "claude-3-sonnet-20240229",
-  "content": [
-    {
-      "type": "text",
-      "text": "The sky is blue during a clear day due to a phenomenon called Rayleigh scattering."
-    }
-  ],
-  "usage": {
-    "input_tokens": 10,
-    "output_tokens": 5,
-    "total_tokens": 15
-  }
-}
-```
+### Development Commands
 
-Streaming response (Server-Sent Events format):
-```
-data: {"type":"message_delta","id":"mock-123","delta":{"type":"content_block_delta","index":0,"delta":{"text":"T"}}}
+- `make setup`: Install all development dependencies
+- `make test`: Run the test suite
+- `make format`: Format code with black and isort
+- `make lint`: Run all code quality checks (format, lint, type)
+- `make build`: Build the package
+- `make clean`: Remove build artifacts and cache files
+- `make install-dev`: Install package with development dependencies
 
-data: {"type":"message_delta","id":"mock-123","delta":{"type":"content_block_delta","index":0,"delta":{"text":"h"}}}
+For more details on available commands, run `make help`.
 
-... (character by character)
-
-data: [DONE]
-```
+## Contributing
 
-## Error Handling
+Contributions are welcome! Please feel free to submit a Pull Request.
 
-The server includes comprehensive error handling:
+## License
 
-- Invalid requests return 400 status codes with descriptive messages
-- Server errors return 500 status codes with error details
-- All errors are logged using JSON format
+This project is licensed under the Apache License, Version 2.0 - see the [LICENSE](LICENSE) file for details.
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..2abde4e
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,79 @@
+[build-system]
+requires = ["setuptools>=45", "wheel", "setuptools_scm>=6.2"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "mockllm"
+dynamic = ["version"]
+description = "A mock server that mimics OpenAI and Anthropic API formats for testing"
+readme = "README.md"
+requires-python = ">=3.8"
+license = {text = "Apache-2.0"}
+keywords = ["mock", "llm", "openai", "anthropic", "testing"]
+authors = [
+    {name = "Luke Hinds", email = "luke@stacklok.com"}
+]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: Apache Software License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Topic :: Software Development :: Testing",
+]
+dependencies = [
+    "fastapi>=0.68.0",
+    "uvicorn>=0.15.0",
+    "pydantic>=2.0.0",
+    "python-json-logger>=2.0.0",
+    "pyyaml>=5.4.1",
+    "watchdog>=2.1.0",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest>=6.0",
+    "pytest-asyncio>=0.15.0",
+    "black>=21.0",
+    "isort>=5.0",
+    "mypy>=0.900",
+    "ruff>=0.1.0",
+]
+
+[project.urls]
+Homepage = "https://github.com/stacklok/mockllm"
+Repository = "https://github.com/stacklok/mockllm.git"
+Issues = "https://github.com/stacklok/mockllm/issues"
+
+[tool.setuptools]
+packages = ["mockllm"]
+package-dir = {"" = "src"}
+
+[tool.setuptools_scm]
+write_to = "src/mockllm/_version.py"
+
+[tool.black]
+line-length = 88
+target-version = ["py38"]
+include = '\.pyi?$'
+
+[tool.isort]
+profile = "black"
+multi_line_output = 3
+line_length = 88
+
+[tool.mypy]
+python_version = "3.8"
+warn_return_any = true
+warn_unused_configs = true
+disallow_untyped_defs = true
+check_untyped_defs = true
+
+[tool.ruff]
+line-length = 88
+target-version = "py38"
+select = ["E", "F", "B", "I"]
+ignore = []
\ No newline at end of file

From b803170eca49a87541f0d648ee38b348a1da8d9d Mon Sep 17 00:00:00 2001
From: Luke Hinds <luke@stacklok.com>
Date: Fri, 14 Feb 2025 14:12:53 +0000
Subject: [PATCH 2/2] Formatting

---
 main.py                 |   2 +-
 src/mockllm/__init__.py |   2 +-
 src/mockllm/_version.py |  17 ++++++
 src/mockllm/config.py   |  24 +++++---
 src/mockllm/models.py   |  27 ++++++++-
 src/mockllm/server.py   | 131 ++++++++++++++++++++--------------------
 6 files changed, 123 insertions(+), 80 deletions(-)
 create mode 100644 src/mockllm/_version.py

diff --git a/main.py b/main.py
index 6968ba6..3d4740a 100644
--- a/main.py
+++ b/main.py
@@ -3,4 +3,4 @@
 from src.mockllm.server import app
 
 if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=8000, reload=True)
\ No newline at end of file
+    uvicorn.run(app, host="0.0.0.0", port=8000, reload=True)
diff --git a/src/mockllm/__init__.py b/src/mockllm/__init__.py
index 39a6a59..d5a7792 100644
--- a/src/mockllm/__init__.py
+++ b/src/mockllm/__init__.py
@@ -2,4 +2,4 @@
 Mock LLM Server - You will do what I tell you!
 """
 
-__version__ = "0.1.0"
\ No newline at end of file
+__version__ = "0.1.0"
diff --git a/src/mockllm/_version.py b/src/mockllm/_version.py
new file mode 100644
index 0000000..9bd6f7c
--- /dev/null
+++ b/src/mockllm/_version.py
@@ -0,0 +1,17 @@
+# file generated by setuptools_scm
+# don't change, don't track in version control
+TYPE_CHECKING = False
+if TYPE_CHECKING:
+    from typing import Tuple, Union
+
+    VERSION_TUPLE = Tuple[Union[int, str], ...]
+else:
+    VERSION_TUPLE = object
+
+version: str
+__version__: str
+__version_tuple__: VERSION_TUPLE
+version_tuple: VERSION_TUPLE
+
+__version__ = version = "0.1.dev13+gb4dbfaf"
+__version_tuple__ = version_tuple = (0, 1, "dev13", "gb4dbfaf")
diff --git a/src/mockllm/config.py b/src/mockllm/config.py
index 4943726..83646d1 100644
--- a/src/mockllm/config.py
+++ b/src/mockllm/config.py
@@ -11,6 +11,7 @@
 logging.basicConfig(level=logging.INFO, handlers=[log_handler])
 logger = logging.getLogger(__name__)
 
+
 class ResponseConfig:
     """Handles loading and managing response configurations from YAML."""
 
@@ -26,19 +27,20 @@ def load_responses(self) -> None:
         try:
             current_mtime = Path(self.yaml_path).stat().st_mtime
             if current_mtime > self.last_modified:
-                with open(self.yaml_path, 'r') as f:
+                with open(self.yaml_path, "r") as f:
                     data = yaml.safe_load(f)
-                    self.responses = data.get('responses', {})
-                    self.default_response = data.get('defaults', {}).get(
-                        'unknown_response', self.default_response
+                    self.responses = data.get("responses", {})
+                    self.default_response = data.get("defaults", {}).get(
+                        "unknown_response", self.default_response
                     )
                 self.last_modified = current_mtime
-                logger.info(f"Loaded {len(self.responses)} responses from {self.yaml_path}")
+                logger.info(
+                    f"Loaded {len(self.responses)} responses from {self.yaml_path}"
+                )
         except Exception as e:
             logger.error(f"Error loading responses: {str(e)}")
             raise HTTPException(
-                status_code=500,
-                detail="Failed to load response configuration"
+                status_code=500, detail="Failed to load response configuration"
             )
 
     def get_response(self, prompt: str) -> str:
@@ -46,14 +48,16 @@ def get_response(self, prompt: str) -> str:
         self.load_responses()  # Check for updates
         return self.responses.get(prompt.lower().strip(), self.default_response)
 
-    def get_streaming_response(self, prompt: str, chunk_size: Optional[int] = None) -> str:
+    def get_streaming_response(
+        self, prompt: str, chunk_size: Optional[int] = None
+    ) -> str:
         """Generator that yields response content character by character or in chunks."""
         response = self.get_response(prompt)
         if chunk_size:
             # Yield response in chunks
             for i in range(0, len(response), chunk_size):
-                yield response[i:i + chunk_size]
+                yield response[i : i + chunk_size]
         else:
             # Yield response character by character
             for char in response:
-                yield char
\ No newline at end of file
+                yield char
diff --git a/src/mockllm/models.py b/src/mockllm/models.py
index ae3f62e..ef2f671 100644
--- a/src/mockllm/models.py
+++ b/src/mockllm/models.py
@@ -1,42 +1,54 @@
 import time
 import uuid
-from typing import Dict, List, Optional, Literal
+from typing import Dict, List, Literal, Optional
 
 from pydantic import BaseModel, Field
 
+
 # OpenAI Models
 class OpenAIMessage(BaseModel):
     """OpenAI chat message model."""
+
     role: str
     content: str
 
+
 class OpenAIChatRequest(BaseModel):
     """OpenAI chat completion request model."""
+
     model: str
     messages: List[OpenAIMessage]
     temperature: Optional[float] = Field(default=0.7)
     max_tokens: Optional[int] = Field(default=150)
     stream: Optional[bool] = Field(default=False)
 
+
 class OpenAIDeltaMessage(BaseModel):
     """OpenAI streaming delta message model."""
+
     role: Optional[str] = None
     content: Optional[str] = None
 
+
 class OpenAIStreamChoice(BaseModel):
     """OpenAI streaming choice model."""
+
     delta: OpenAIDeltaMessage
     index: int = 0
     finish_reason: Optional[str] = None
 
+
 class OpenAIChatChoice(BaseModel):
     """OpenAI regular chat choice model."""
+
     message: OpenAIMessage
     index: int = 0
     finish_reason: str = "stop"
 
+
 class OpenAIChatResponse(BaseModel):
     """OpenAI chat completion response model."""
+
     id: str = Field(default_factory=lambda: f"mock-{uuid.uuid4()}")
     object: str = "chat.completion"
     created: int = Field(default_factory=lambda: int(time.time()))
@@ -44,30 +56,38 @@ class OpenAIChatResponse(BaseModel):
     choices: List[Dict]
     usage: Dict[str, int]
 
+
 class OpenAIStreamResponse(BaseModel):
     """OpenAI streaming response model."""
+
     id: str = Field(default_factory=lambda: f"mock-{uuid.uuid4()}")
     object: str = "chat.completion.chunk"
     created: int = Field(default_factory=lambda: int(time.time()))
     model: str
     choices: List[OpenAIStreamChoice]
 
+
 # Anthropic Models
 class AnthropicMessage(BaseModel):
     """Anthropic message model."""
+
     role: Literal["user", "assistant"]
     content: str
 
+
 class AnthropicChatRequest(BaseModel):
     """Anthropic chat completion request model."""
+
     model: str
     max_tokens: Optional[int] = Field(default=1024)
     messages: List[AnthropicMessage]
     stream: Optional[bool] = Field(default=False)
     temperature: Optional[float] = Field(default=1.0)
 
+
 class AnthropicChatResponse(BaseModel):
     """Anthropic chat completion response model."""
+
     id: str = Field(default_factory=lambda: f"mock-{uuid.uuid4()}")
     type: str = "message"
     role: str = "assistant"
@@ -77,19 +97,24 @@ class AnthropicChatResponse(BaseModel):
     stop_sequence: Optional[str] = None
     usage: Dict[str, int]
 
+
 class AnthropicStreamDelta(BaseModel):
     """Anthropic streaming delta model."""
+
     type: str = "content_block_delta"
     index: int = 0
     delta: Dict[str, str]
 
+
 class AnthropicStreamResponse(BaseModel):
     """Anthropic streaming response model."""
+
     type: str = "message_delta"
     id: str = Field(default_factory=lambda: f"mock-{uuid.uuid4()}")
     delta: AnthropicStreamDelta
     usage: Optional[Dict[str, int]] = None
 
+
 # For backward compatibility
 Message = OpenAIMessage
 ChatRequest = OpenAIChatRequest
diff --git a/src/mockllm/server.py b/src/mockllm/server.py
index aa90469..b0d641d 100644
--- a/src/mockllm/server.py
+++ b/src/mockllm/server.py
@@ -6,10 +6,17 @@
 from pythonjsonlogger import jsonlogger
 
 from .config import ResponseConfig
-from .models import (OpenAIChatRequest, OpenAIChatResponse, OpenAIDeltaMessage,
-                    OpenAIStreamChoice, OpenAIStreamResponse,
-                    AnthropicChatRequest, AnthropicChatResponse,
-                    AnthropicStreamResponse, AnthropicStreamDelta)
+from .models import (
+    AnthropicChatRequest,
+    AnthropicChatResponse,
+    AnthropicStreamDelta,
+    AnthropicStreamResponse,
+    OpenAIChatRequest,
+    OpenAIChatResponse,
+    OpenAIDeltaMessage,
+    OpenAIStreamChoice,
+    OpenAIStreamResponse,
+)
 
 log_handler = logging.StreamHandler()
 log_handler.setFormatter(jsonlogger.JsonFormatter())
@@ -20,16 +27,13 @@
 
 response_config = ResponseConfig()
 
+
 async def openai_stream_response(content: str, model: str) -> AsyncGenerator[str, None]:
     """Generate OpenAI-style streaming response in SSE format."""
     # Send the first message with role
     first_chunk = OpenAIStreamResponse(
         model=model,
-        choices=[
-            OpenAIStreamChoice(
-                delta=OpenAIDeltaMessage(role="assistant")
-            )
-        ]
+        choices=[OpenAIStreamChoice(delta=OpenAIDeltaMessage(role="assistant"))],
     )
     yield f"data: {first_chunk.model_dump_json()}\n\n"
 
@@ -37,64 +41,60 @@ async def openai_stream_response(content: str, model: str) -> AsyncGenerator[str
     for chunk in response_config.get_streaming_response(content):
         chunk_response = OpenAIStreamResponse(
             model=model,
-            choices=[
-                OpenAIStreamChoice(
-                    delta=OpenAIDeltaMessage(content=chunk)
-                )
-            ]
+            choices=[OpenAIStreamChoice(delta=OpenAIDeltaMessage(content=chunk))],
         )
         yield f"data: {chunk_response.model_dump_json()}\n\n"
 
     # Send the final message
     final_chunk = OpenAIStreamResponse(
         model=model,
-        choices=[
-            OpenAIStreamChoice(
-                delta=OpenAIDeltaMessage(),
-                finish_reason="stop"
-            )
-        ]
+        choices=[OpenAIStreamChoice(delta=OpenAIDeltaMessage(), finish_reason="stop")],
     )
     yield f"data: {final_chunk.model_dump_json()}\n\n"
     yield "data: [DONE]\n\n"
 
-async def anthropic_stream_response(content: str, model: str) -> AsyncGenerator[str, None]:
+
+async def anthropic_stream_response(
+    content: str, model: str
+) -> AsyncGenerator[str, None]:
     """Generate Anthropic-style streaming response in SSE format."""
     for chunk in response_config.get_streaming_response(content):
         stream_response = AnthropicStreamResponse(
-            delta=AnthropicStreamDelta(
-                delta={"text": chunk}
-            )
+            delta=AnthropicStreamDelta(delta={"text": chunk})
         )
         yield f"data: {stream_response.model_dump_json()}\n\n"
-    
+
     yield "data: [DONE]\n\n"
 
+
 @app.post("/v1/chat/completions", response_model=None)
-async def openai_chat_completion(request: OpenAIChatRequest) -> Union[OpenAIChatResponse, StreamingResponse]:
+async def openai_chat_completion(
+    request: OpenAIChatRequest,
+) -> Union[OpenAIChatResponse, StreamingResponse]:
     """Handle chat completion requests, supporting both regular and streaming responses."""
     try:
-        logger.info("Received chat completion request", extra={
-            "model": request.model,
-            "message_count": len(request.messages),
-            "stream": request.stream
-        })
+        logger.info(
+            "Received chat completion request",
+            extra={
+                "model": request.model,
+                "message_count": len(request.messages),
+                "stream": request.stream,
+            },
+        )
 
         last_message = next(
-            (msg for msg in reversed(request.messages) if msg.role == "user"),
-            None
+            (msg for msg in reversed(request.messages) if msg.role == "user"), None
         )
 
         if not last_message:
             raise HTTPException(
-                status_code=400,
-                detail="No user message found in request"
+                status_code=400, detail="No user message found in request"
             )
 
         if request.stream:
             return StreamingResponse(
                 openai_stream_response(last_message.content, request.model),
-                media_type="text/event-stream"
+                media_type="text/event-stream",
             )
 
         response_content = response_config.get_response(last_message.content)
@@ -106,53 +106,53 @@ async def openai_chat_completion(request: OpenAIChatRequest) -> Union[OpenAIChat
 
         return OpenAIChatResponse(
             model=request.model,
-            choices=[{
-                "index": 0,
-                "message": {
-                    "role": "assistant",
-                    "content": response_content
-                },
-                "finish_reason": "stop"
-            }],
+            choices=[
+                {
+                    "index": 0,
+                    "message": {"role": "assistant", "content": response_content},
+                    "finish_reason": "stop",
+                }
+            ],
             usage={
                 "prompt_tokens": prompt_tokens,
                 "completion_tokens": completion_tokens,
-                "total_tokens": total_tokens
-            }
+                "total_tokens": total_tokens,
+            },
         )
 
     except Exception as e:
         logger.error(f"Error processing request: {str(e)}")
-        raise HTTPException(
-            status_code=500,
-            detail=f"Internal server error: {str(e)}"
-        )
+        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
+
 
 @app.post("/v1/messages", response_model=None)
-async def anthropic_chat_completion(request: AnthropicChatRequest) -> Union[AnthropicChatResponse, StreamingResponse]:
+async def anthropic_chat_completion(
+    request: AnthropicChatRequest,
+) -> Union[AnthropicChatResponse, StreamingResponse]:
     """Handle Anthropic chat completion requests, supporting both regular and streaming responses."""
     try:
-        logger.info("Received Anthropic chat completion request", extra={
-            "model": request.model,
-            "message_count": len(request.messages),
-            "stream": request.stream
-        })
+        logger.info(
+            "Received Anthropic chat completion request",
+            extra={
+                "model": request.model,
+                "message_count": len(request.messages),
+                "stream": request.stream,
+            },
+        )
 
         last_message = next(
-            (msg for msg in reversed(request.messages) if msg.role == "user"),
-            None
+            (msg for msg in reversed(request.messages) if msg.role == "user"), None
         )
 
         if not last_message:
             raise HTTPException(
-                status_code=400,
-                detail="No user message found in request"
+                status_code=400, detail="No user message found in request"
             )
 
         if request.stream:
             return StreamingResponse(
                 anthropic_stream_response(last_message.content, request.model),
-                media_type="text/event-stream"
+                media_type="text/event-stream",
             )
 
         response_content = response_config.get_response(last_message.content)
@@ -168,13 +168,10 @@ async def anthropic_chat_completion(request: AnthropicChatRequest) -> Union[Anth
             usage={
                 "input_tokens": prompt_tokens,
                 "output_tokens": completion_tokens,
-                "total_tokens": total_tokens
-            }
+                "total_tokens": total_tokens,
+            },
         )
 
     except Exception as e:
         logger.error(f"Error processing request: {str(e)}")
-        raise HTTPException(
-            status_code=500,
-            detail=f"Internal server error: {str(e)}"
-        )
+        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")