diff --git a/.gitignore b/.gitignore index 8661818d85..48303bc0a5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ .env .vscode +.DS_Store diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000..dc8841f063 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ +Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2025 H3 Labs Inc. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000000..e227739c01 --- /dev/null +++ b/README.md @@ -0,0 +1,399 @@ +# Bifrost + +Bifrost is an open-source middleware that serves as a unified gateway to various AI model providers, enabling seamless integration and fallback mechanisms for your AI-powered applications. + +## ๐Ÿ“‘ Table of Contents + +- [Bifrost](#bifrost) + - [๐Ÿ“‘ Table of Contents](#-table-of-contents) + - [๐Ÿ” Overview](#-overview) + - [โœจ Features](#-features) + - [๐Ÿ—๏ธ Repository Structure](#๏ธ-repository-structure) + - [๐Ÿ“Š Benchmarks](#-benchmarks) + - [Test Environment](#test-environment) + - [t3.medium Instance](#t3medium-instance) + - [t3.xlarge Instance](#t3xlarge-instance) + - [Performance Metrics](#performance-metrics) + - [Key Performance Highlights](#key-performance-highlights) + - [๐Ÿš€ Getting Started](#-getting-started) + - [Package Structure](#package-structure) + - [Prerequisites](#prerequisites) + - [Setting up Bifrost](#setting-up-bifrost) + - [Additional Configurations](#additional-configurations) + - [๐Ÿค Contributing](#-contributing) + - [๐Ÿ“„ License](#-license) + +--- + +## ๐Ÿ” Overview + +Bifrost acts as a bridge between your applications and multiple AI providers (OpenAI, Anthropic, Amazon Bedrock, etc.). It provides a consistent API interface while handling: + +- Authentication and key management +- Request routing and load balancing +- Fallback mechanisms for reliability +- Unified request and response formatting +- Connection pooling and concurrency control + +With Bifrost, you can focus on building your AI-powered applications without worrying about the underlying provider-specific implementations. It handles all the complexities of key and provider management, providing a fixed input and output format so you don't need to modify your codebase for different providers. + +--- + +## โœจ Features + +- **Multi-Provider Support**: Integrate with OpenAI, Anthropic, Amazon Bedrock, and more through a single API +- **Fallback Mechanisms**: Automatically retry failed requests with alternative models or providers +- **Dynamic Key Management**: Rotate and manage API keys efficiently +- **Connection Pooling**: Optimize network resources for better performance +- **Concurrency Control**: Manage rate limits and parallel requests effectively +- **HTTP Transport**: RESTful API interface for easy integration +- **Custom Configuration**: Flexible JSON-based configuration + +--- + +## ๐Ÿ—๏ธ Repository Structure + +Bifrost is built with a modular architecture: + +``` +bifrost/ +โ”œโ”€โ”€ core/ # Core functionality and shared components +โ”‚ โ”œโ”€โ”€ providers/ # Provider-specific implementations +โ”‚ โ”œโ”€โ”€ schemas/ # Interfaces and structs used in bifrost +โ”‚ โ”œโ”€โ”€ tests/ # Tests to make sure everything is in place +โ”‚ โ”œโ”€โ”€ bifrost.go # Main Bifrost implementation +โ”‚ +โ”œโ”€โ”€ transports/ # Interface layers (HTTP, gRPC, etc.) +โ”‚ โ”œโ”€โ”€ http/ # HTTP transport implementation +โ”‚ โ””โ”€โ”€ ... +โ”‚ +โ””โ”€โ”€ plugins/ # Plugin Implementations + โ”œโ”€โ”€ maxim-logger.go + โ””โ”€โ”€ ... +``` + +The system uses a provider-agnostic approach with well-defined interfaces to easily extend to new AI providers. All interfaces are defined in `core/schemas/` and can be used as a reference for adding new plugins. + +--- + +## ๐Ÿ“Š Benchmarks + +Bifrost has been tested under high load conditions to ensure optimal performance. The following results were obtained from benchmark tests running at 5000 requests per second (RPS) on different AWS EC2 instances, with Bifrost running inside Docker containers. + +### Test Environment + +#### t3.medium Instance +- **Instance**: AWS EC2 t3.medium +- **vCPUs**: 2 +- **Memory**: 4GB RAM +- **Container**: Docker container with resource limits matching instance specs +- **Bifrost Configurations**: + - Buffer Size: 15,000 + - Initial Pool Size: 10,000 + +#### t3.xlarge Instance +- **Instance**: AWS EC2 t3.xlarge +- **vCPUs**: 4 +- **Memory**: 16GB RAM +- **Container**: Docker container with resource limits matching instance specs +- **Bifrost Configurations**: + - Buffer Size: 20,000 + - Initial Pool Size: 15,000 + +### Performance Metrics + +| Metric | t3.medium | t3.xlarge | +|--------|-----------|-----------| +| Success Rate | 100.00% | 100.00% | +| Average Request Size | 0.13 KB | 0.13 KB | +| **Average Response Size** | **`1.37 KB`** | **`10.32 KB`** | +| Average Latency | 2.12s | 1.61s | +| Peak Memory Usage | 1312.79 MB | 3340.44 MB | +| Queue Wait Time | 47.13 ยตs | 1.67 ยตs | +| Key Selection Time | 16 ns | 10 ns | +| Message Formatting | 2.19 ยตs | 2.11 ยตs | +| Params Preparation | 436 ns | 417 ns | +| Request Body Preparation | 2.65 ยตs | 2.36 ยตs | +| JSON Marshaling | 63.47 ยตs | 26.80 ยตs | +| Request Setup | 6.59 ยตs | 7.17 ยตs | +| HTTP Request | 1.56s | 1.50s | +| Error Handling | 189 ns | 162 ns | +| Response Parsing | 11.30 ms | 2.11 ms | + +### Key Performance Highlights + +- **Perfect Success Rate**: 100% request success rate under high load on both instances +- **Efficient Queue Management**: Minimal queue wait time (1.67 ยตs on t3.xlarge) +- **Fast Key Selection**: Near-instantaneous key selection (10 ns on t3.xlarge) +- **Optimized Memory Usage**: + - t3.medium: ~1.3GB at 5000 RPS + - t3.xlarge: ~3.3GB at 5000 RPS +- **Efficient Request Processing**: Most operations complete in microseconds +- **Network Efficiency**: + - Consistent small request sizes (0.13 KB) across instances + - Larger response sizes on t3.xlarge (10.32 KB vs 1.37 KB) due to more detailed responses +- **Improved Performance on t3.xlarge**: + - 24% faster average latency + - 81% faster response parsing + - 58% faster JSON marshaling + - Significantly reduced queue wait times + - Higher buffer and pool sizes enabled by increased resources + +These benchmarks demonstrate Bifrost's ability to handle high-throughput scenarios while maintaining reliability and performance, even when containerized. The t3.xlarge instance shows improved performance across most metrics, particularly in processing times and latency, while maintaining the same high reliability and success rate. The larger response sizes on t3.xlarge indicate its ability to handle more detailed responses without compromising performance. + +One of Bifrost's key strengths is its flexibility in configuration. You can freely decide the tradeoff between memory usage and processing speed by adjusting Bifrost's configurations: + +- **Memory vs Speed Tradeoff**: + - Higher buffer and pool sizes (like in t3.xlarge) improve speed but use more memory + - Lower configurations (like in t3.medium) use less memory but may have slightly higher latencies + - You can fine-tune these parameters based on your specific needs and available resources + +- **Customizable Parameters**: + - Buffer Size: Controls the maximum number of concurrent requests + - Initial Pool Size: Determines the initial allocation of resources + - Concurrency Settings: Adjustable per provider + - Retry and Timeout Configurations: Customizable based on your requirements + +This flexibility allows you to optimize Bifrost for your specific use case, whether you prioritize speed, memory efficiency, or a balance between the two. + +--- + +## ๐Ÿš€ Getting Started + +If you want to **set up the Bifrost API quickly**, [check the transports documentation](https://github.com/maximhq/bifrost/tree/main/transports/README.md). + +### Package Structure + +Bifrost is divided into three Go packages: core, plugins, and transports. + +1. **core**: This package contains the core implementation of Bifrost as a Go package. + +2. **plugins**: This package serves as an extension to core. You can download this package using `go get github.com/maximhq/bifrost/plugins` and pass the plugins while initializing Bifrost. + + ```golang + plugin, err := plugins.NewMaximLoggerPlugin(os.Getenv("MAXIM_API_KEY"), os.Getenv("MAXIM_LOGGER_ID")) + if err != nil { + return nil, err + } + + // Initialize Bifrost + client, err := bifrost.Init(schemas.BifrostConfig{ + Account: &account, + Plugins: []schemas.Plugin{plugin}, + }) + ``` + +3. **transports**: This package contains transport clients like HTTP to expose your Bifrost client. You can either `go get` this package or directly use the independent Dockerfile to quickly spin up your Bifrost API interface ([Click here](https://github.com/maximhq/bifrost/tree/main/transports/README.md) to read more on this). + +### Prerequisites + +- Go 1.23 or higher +- Access to at least one AI model provider (OpenAI, Anthropic, etc.) +- API keys for the providers you wish to use + +### Setting up Bifrost + +1. Setting up your account: You first need to create your account which follows [Bifrost's account interface](https://github.com/maximhq/bifrost/blob/main/core/schemas/account.go). + +Example: + ```golang + type BaseAccount struct{} + + func (baseAccount *BaseAccount) GetConfiguredProviders() ([]schemas.ModelProvider, error) { + return []schemas.ModelProvider{schemas.OpenAI}, nil + } + + func (baseAccount *BaseAccount) GetKeysForProvider(providerKey schemas.ModelProvider) ([]schemas.Key, error) { + switch providerKey { + case schemas.OpenAI: + return []schemas.Key{ + { + Value: os.Getenv("OPENAI_API_KEY"), + Models: []string{"gpt-4o-mini"}, + }, + }, nil + default: + return nil, fmt.Errorf("unsupported provider: %s", providerKey) + } + } + + func (baseAccount *BaseAccount) GetConfigForProvider(providerKey schemas.ModelProvider) (*schemas.ProviderConfig, error) { + switch providerKey { + case schemas.OpenAI: + return &schemas.ProviderConfig{ + ConcurrencyAndBufferSize: schemas.ConcurrencyAndBufferSize{ + Concurrency: 3, + BufferSize: 10, + }, + }, nil + default: + return nil, fmt.Errorf("unsupported provider: %s", providerKey) + } + } + ``` + +Bifrost uses these methods to get all the keys and configurations it needs to call the providers. You can check the [Additional Configurations](#additional-configurations) section for further customizations. + +2. Get bifrost core package: Simply run `go get github.com/maximhq/bifrost/core` to download bifrost/core package. + +3. Initialising Bifrost: Initialise bifrost by providing your account implementation + +```golang +client, err := bifrost.Init(schemas.BifrostConfig{ + Account: &yourAccount, +}) +``` + +4. Make your First LLM Call! + +```golang + msg = "What is a LLM gateway?" + messages := []schemas.Message{ + { Role: schemas.RoleUser, Content: &msg }, + } + + bifrostResult, bifrostErr := bifrost.ChatCompletionRequest( + schemas.OpenAI, &schemas.BifrostRequest{ + Model: "gpt-4o", // make sure you have configured gpt-4o in your account interface + Input: schemas.RequestInput{ + ChatCompletionInput: &messages, + }, + }, context.Background() + ) +``` + +you can add model parameters by passing them in `Params:&schemas.ModelParameters{...yourParams}` ChatCompletionRequest. + +### Additional Configurations + +1. InitalPoolSize and DropExcessRequests: You can customise the initial pool size of the structs and channels bifrost creates on `bifrost.Init()`. A higher value would mean lesser run time allocations and lower latency but at the cost of more memory usage. Takes the defined default value if not provided. + +```golang + client, err := bifrost.Init(schemas.BifrostConfig{ + Account: &yourAccount, + InitialPoolSize: 500, + DropExcessRequests: true, + }) +``` + +When `DropExcessRequests` is set to true, in cases where the queue is full, requests will not wait for the queue to be empty and will be dropped instead. By default it is set to false. + +2. Logger: Like account interface, bifrost also allows you to pass your custom logger if it follows [bifrost's logger interface](https://github.com/maximhq/bifrost/blob/main/core/schemas/logger.go). Takes in the [default logger](https://github.com/maximhq/bifrost/blob/main/core/logger.go) if not provided. + +```golang + client, err := bifrost.Init(schemas.BifrostConfig{ + Account: &yourAccount, + Logger: &yourLogger, + }) +``` + +The default logger is set to level info by default. If you wish to use it but with a different log level, you can do it like this - + +```golang + client, err := bifrost.Init(schemas.BifrostConfig{ + Account: &yourAccount, + Logger: bifrost.NewDefaultLogger(schemas.LogLevelDebug), + }) +``` + +3. Plugins: You can create and pass your custom pre-hook and post-hook plugins to bifrost as long as they follow [bifrost's plugin interface](https://github.com/maximhq/bifrost/blob/main/core/schemas/plugin.go). + +```golang + client, err := bifrost.Init(schemas.BifrostConfig{ + Account: &yourAccount, + Plugins: []schemas.Plugin{yourPlugin1, yourPlugin2, ...}, + }) +``` + +4. Customise your provider settings: You can customise proxy config, timeouts, retry settings, concurrency buffer sizes for each of your provider in your account interface's GetConfigForProvider() method. + +exmaple: +```golang + schemas.ProviderConfig{ + NetworkConfig: schemas.NetworkConfig{ + DefaultRequestTimeoutInSeconds: 30, + MaxRetries: 2, + RetryBackoffInitial: 100 * time.Millisecond, + RetryBackoffMax: 2 * time.Second, + }, + MetaConfig: &meta.BedrockMetaConfig{ + SecretAccessKey: os.Getenv("BEDROCK_ACCESS_KEY"), + Region: StrPtr("us-east-1"), + }, + ConcurrencyAndBufferSize: schemas.ConcurrencyAndBufferSize{ + Concurrency: 3, + BufferSize: 10, + }, + ProxyConfig: &schemas.ProxyConfig{ + Type: schemas.HttpProxy, + URL: yourProxyURL, + }, + } +``` + +You can manage buffer size (maximum number of requests you want to hold in the system) concurrency (maximum number of requests you want to be made concurrently) for each provider. You can manage user usage and provider limits by providing these custom provider settings Default values are taken for network config, concurrecy and buffer sizes if not provided. + +Bifrost also supports multiple API keys per provider, enabling both load balancing and redundancy. You can assign weights to each key to control how frequently they are selected for requests. By default, all keys are treated with equal weight unless specified otherwise. + +```golang + []schemas.Key{ + { + Value: os.Getenv("OPEN_AI_API_KEY1"), + Models: []string{"gpt-4o-mini", "gpt-4-turbo"}, + Weight: 0.6, + }, + { + Value: os.Getenv("OPEN_AI_API_KEY2"), + Models: []string{"gpt-4-turbo"}, + Weight: 0.3, + }, + { + Value: os.Getenv("OPEN_AI_API_KEY3"), + Models: []string{"gpt-4o-mini"}, + Weight: 0.1, + }, + } +``` + +You can check [this](https://github.com/maximhq/bifrost/blob/main/core/tests/account.go) file to refer all the customisation settings. + +5. Fallbacks: You can define fallback providers for each request, which will be used if all retry attempts with your primary provider fail. These fallback providers are attempted in the order you specify, provided they are configured in your account at runtime. Once a fallback is triggered, its own retry settings will apply, rather than those of the original provider. + +```golang + result, err := bifrost.ChatCompletionRequest( + schemas.OpenAI, &schemas.BifrostRequest{ + Model: "gpt-4o", + Input: schemas.RequestInput{ + ChatCompletionInput: &messages, + }, + Fallbacks: []schemas.Fallback{ + { + Provider: schemas.Anthropic, + Model: "claude-3-5-sonnet-20240620", // make sure you have configured this + }, + }, + }, context.Background() + ) +``` + +--- + +## ๐Ÿค Contributing + +Contributions are welcome! We welcome all kinds of contributions โ€” bug fixes, features, docs, and ideas. Please feel free to submit a Pull Request. + +1. Fork the repository +2. Create your feature branch (`git checkout -b feature/amazing-feature`) +3. Commit your changes (`git commit -m 'Add some amazing feature'`) +4. Push to the branch (`git push origin feature/amazing-feature`) +5. Open a Pull Request and describe your changes + +--- + +## ๐Ÿ“„ License + +This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. + +--- + +Built with โค๏ธ by [Maxim](https://github.com/maximhq) diff --git a/core/tests/account.go b/core/tests/account.go index ca1aab3ac1..53278b7ce3 100644 --- a/core/tests/account.go +++ b/core/tests/account.go @@ -41,7 +41,7 @@ func (baseAccount *BaseAccount) GetConfiguredProviders() ([]schemas.ModelProvide // - error: An error if the provider is not supported // // Environment Variables Used: -// - OPEN_AI_API_KEY: API key for OpenAI +// - OPENAI_API_KEY: API key for OpenAI // - ANTHROPIC_API_KEY: API key for Anthropic // - BEDROCK_API_KEY: API key for AWS Bedrock // - COHERE_API_KEY: API key for Cohere @@ -51,7 +51,7 @@ func (baseAccount *BaseAccount) GetKeysForProvider(providerKey schemas.ModelProv case schemas.OpenAI: return []schemas.Key{ { - Value: os.Getenv("OPEN_AI_API_KEY"), + Value: os.Getenv("OPENAI_API_KEY"), Models: []string{"gpt-4o-mini", "gpt-4-turbo"}, Weight: 1.0, }, diff --git a/readMe.md b/readMe.md deleted file mode 100644 index 597cbc8560..0000000000 --- a/readMe.md +++ /dev/null @@ -1,277 +0,0 @@ -# Bifrost - -![Bifrost Logo](https://via.placeholder.com/150x150.png?text=Bifrost) - -## ๐ŸŒˆ The Bridge Between Your Application and AI Providers - -Bifrost is an open-source middleware that serves as a unified gateway to various AI model providers, enabling seamless integration and fallback mechanisms for your AI-powered applications. - ---- - -## ๐Ÿ“‹ Table of Contents - -- [Overview](#overview) -- [Features](#features) -- [Architecture](#architecture) -- [Getting Started](#getting-started) - - [Prerequisites](#prerequisites) - - [Installation](#installation) - - [Docker Setup](#docker-setup) -- [Configuration](#configuration) -- [Usage](#usage) - - [HTTP Transport](#http-transport) - - [Text Completions](#text-completions) - - [Chat Completions](#chat-completions) -- [Advanced Features](#advanced-features) - - [Fallbacks](#fallbacks) - - [Key Management](#key-management) - - [Concurrency Control](#concurrency-control) -- [API Reference](#api-reference) -- [Contributing](#contributing) -- [License](#license) - ---- - -## ๐Ÿ” Overview - -Bifrost acts as a bridge between your applications and multiple AI providers (OpenAI, Anthropic, Amazon Bedrock, etc.). It provides a consistent API interface while handling: - -- Authentication & key management -- Request routing & load balancing -- Fallback mechanisms for reliability -- Unified response formatting -- Connection pooling & concurrency control - -With Bifrost, you can focus on building your AI-powered applications without worrying about the underlying provider-specific implementations. - ---- - -## โœจ Features - -- **Multi-Provider Support**: Integrate with OpenAI, Anthropic, Amazon Bedrock, and more through a single API -- **Fallback Mechanisms**: Automatically retry failed requests with alternative models or providers -- **Dynamic Key Management**: Rotate and manage API keys efficiently -- **Connection Pooling**: Optimize network resources for better performance -- **Concurrency Control**: Manage rate limits and parallel requests effectively -- **HTTP Transport**: RESTful API interface for easy integration -- **Custom Configuration**: Flexible JSON-based configuration - ---- - -## ๐Ÿ—๏ธ Architecture - -Bifrost is built with a modular architecture: - -``` -bifrost/ -โ”œโ”€โ”€ core/ # Core functionality and shared components -โ”œโ”€โ”€ transports/ # Interface layers (HTTP, gRPC, etc.) -โ”‚ โ”œโ”€โ”€ http/ # HTTP transport implementation -โ”‚ โ””โ”€โ”€ ... -โ””โ”€โ”€ providers/ # AI provider-specific implementations - โ”œโ”€โ”€ openai/ - โ”œโ”€โ”€ anthropic/ - โ”œโ”€โ”€ bedrock/ - โ””โ”€โ”€ ... -``` - -The system uses a provider-agnostic approach with well-defined interfaces to easily extend to new AI providers. - ---- - -## ๐Ÿš€ Getting Started - -### Prerequisites - -- Go 1.18 or higher -- Access to at least one AI model provider (OpenAI, Anthropic, etc.) -- API keys for the providers you wish to use - -### Installation - -```bash -# Clone the repository -git clone https://github.com/maximhq/bifrost.git -cd bifrost - -# Build the HTTP transport -cd transports -go build -o bifrost ./http -``` - -### Docker Setup - -You can also run Bifrost using Docker: - -```bash -docker build \ - --build-arg CONFIG_PATH=./config.example.json \ - --build-arg ENV_PATH=./.env \ - --build-arg PORT=8080 \ - --build-arg POOL_SIZE=300 \ - -t bifrost-transports ./transports - -docker run -p 8080:8080 bifrost-transports -``` - ---- - -## โš™๏ธ Configuration - -Bifrost uses a combination of a JSON configuration file and environment variables: - -1. Create a configuration file based on the example: - ```bash - cp transports/config.example.json config.json - ``` - -2. Set up your environment variables in a `.env` file: - ```bash - cp transports/.env.example .env - ``` - -3. Edit both files to configure your providers and API keys. - -Example configuration: - -```json -{ - "openai": { - "keys": [ - { - "api_key": "${OPENAI_API_KEY}", - "organization_id": "${OPENAI_ORG_ID}" - } - ], - "network_config": { - "timeout_ms": 30000, - "max_retries": 3 - }, - "concurrency_and_buffer_size": { - "max_concurrency": 10, - "channel_buffer_size": 100 - } - }, - "anthropic": { - "keys": [ - { - "api_key": "${ANTHROPIC_API_KEY}" - } - ] - } -} -``` - ---- - -## ๐Ÿงฐ Usage - -### HTTP Transport - -Start the HTTP server: - -```bash -./bifrost -config config.json -env .env -port 8080 -pool-size 300 -``` - -### Text Completions - -```bash -curl -X POST http://localhost:8080/v1/text/completions \ - -H "Content-Type: application/json" \ - -d '{ - "provider": "openai", - "model": "gpt-4", - "text": "Once upon a time in the land of AI,", - "params": { - "temperature": 0.7, - "max_tokens": 100 - } - }' -``` - -### Chat Completions - -```bash -curl -X POST http://localhost:8080/v1/chat/completions \ - -H "Content-Type: application/json" \ - -d '{ - "provider": "anthropic", - "model": "claude-3-opus-20240229", - "messages": [ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": "Tell me about Bifrost in Norse mythology."} - ], - "params": { - "temperature": 0.7, - "max_tokens": 500 - } - }' -``` - ---- - -## ๐Ÿ”ง Advanced Features - -### Fallbacks - -Configure fallback options in your requests: - -```json -{ - "provider": "openai", - "model": "gpt-4", - "messages": [...], - "fallbacks": [ - { - "provider": "anthropic", - "model": "claude-3-opus-20240229" - }, - { - "provider": "bedrock", - "model": "anthropic.claude-3-sonnet-20240229-v1:0" - } - ] -} -``` - -### Key Management - -Bifrost supports multiple API keys per provider for load balancing and redundancy. - -### Concurrency Control - -Fine-tune concurrency settings per provider to manage rate limits effectively. - ---- - -## ๐Ÿ“˜ API Reference - -### Endpoints - -- `/v1/text/completions`: Text completion requests -- `/v1/chat/completions`: Chat completion requests - -For detailed API documentation, see the [API Reference](docs/api-reference.md). - ---- - -## ๐Ÿค Contributing - -Contributions are welcome! Please feel free to submit a Pull Request. - -1. Fork the repository -2. Create your feature branch (`git checkout -b feature/amazing-feature`) -3. Commit your changes (`git commit -m 'Add some amazing feature'`) -4. Push to the branch (`git push origin feature/amazing-feature`) -5. Open a Pull Request - ---- - -## ๐Ÿ“„ License - -This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. - ---- - -Built with โค๏ธ by [Maxim](https://github.com/maximhq) diff --git a/transports/.env.sample b/transports/.env.sample new file mode 100644 index 0000000000..30e582a355 --- /dev/null +++ b/transports/.env.sample @@ -0,0 +1,10 @@ +OPENAI_API_KEY = YOUR_OPENAI_API_KEY +ANTHROPIC_API_KEY = YOUR_ANTHROPIC_API_KEY +BEDROCK_API_KEY = YOUR_BEDROCK_API_KEY +BEDROCK_ACCESS_KEY = YOUR_BEDROCK_ACCESS_KEY +COHERE_API_KEY = YOUR_COHERE_API_KEY +AZURE_API_KEY = YOUR_AZURE_API_KEY +AZURE_ENDPOINT = YOUR_AZURE_ENDPOINT + +MAXIM_API_KEY = YOUR_MAXIM_API_KEY +MAXIM_LOGGER_ID = YOUR_MAXIM_LOGGER_ID \ No newline at end of file diff --git a/transports/Dockerfile b/transports/Dockerfile index bd2e117bc5..df9ac9901c 100644 --- a/transports/Dockerfile +++ b/transports/Dockerfile @@ -35,10 +35,12 @@ ARG CONFIG_PATH ARG ENV_PATH ARG PORT ARG POOL_SIZE +ARG DROP_EXCESS_REQUESTS # Set default values if args are not provided ENV APP_PORT=${PORT:-8080} ENV APP_POOL_SIZE=${POOL_SIZE:-300} +ENV APP_DROP_EXCESS_REQUESTS=${DROP_EXCESS_REQUESTS:-false} # Copy the config and environment files into the image COPY ${CONFIG_PATH} /app/config/config.json @@ -49,7 +51,7 @@ RUN echo '#!/bin/sh' > /app/entrypoint.sh && \ echo 'if [ ! -f /app/config/config.json ]; then echo "Missing config.json"; exit 1; fi' >> /app/entrypoint.sh && \ echo 'if [ ! -f /app/config/.env ]; then echo "Missing .env"; exit 1; fi' >> /app/entrypoint.sh && \ echo 'if [ ! -f /app/main ]; then echo "Missing main binary"; exit 1; fi' >> /app/entrypoint.sh && \ - echo 'exec /app/main -config /app/config/config.json -env /app/config/.env -port "$APP_PORT" -pool-size "$APP_POOL_SIZE"' >> /app/entrypoint.sh && \ + echo 'exec /app/main -config /app/config/config.json -env /app/config/.env -port "$APP_PORT" -pool-size "$APP_POOL_SIZE" -drop-excess-requests "$APP_DROP_EXCESS_REQUESTS"' >> /app/entrypoint.sh && \ chmod +x /app/entrypoint.sh # Expose the port defined by argument diff --git a/transports/README.md b/transports/README.md new file mode 100644 index 0000000000..0f0670a03b --- /dev/null +++ b/transports/README.md @@ -0,0 +1,178 @@ +# Bifrost Transports + +This package contains clients for various transports that can be used to spin up your Bifrost client with just a single line of code. + +## ๐Ÿ“‘ Table of Contents + +- [Bifrost Transports](#bifrost-transports) + - [๐Ÿ“‘ Table of Contents](#-table-of-contents) + - [๐Ÿš€ Setting Up Transports](#-setting-up-transports) + - [Prerequisites](#prerequisites) + - [Configuration](#configuration) + - [Docker Setup](#docker-setup) + - [Go Setup](#go-setup) + - [๐Ÿงฐ Usage](#-usage) + - [Text Completions](#text-completions) + - [Chat Completions](#chat-completions) + - [๐Ÿ”ง Advanced Features](#-advanced-features) + - [Fallbacks](#fallbacks) + +--- + +## ๐Ÿš€ Setting Up Transports + +### Prerequisites +- Go 1.23 or higher (if not using Docker) +- Access to at least one AI model provider (OpenAI, Anthropic, etc.) +- API keys for the providers you wish to use + +### Configuration + +Bifrost uses a combination of a JSON configuration file and environment variables: + +1. **JSON Configuration File**: Bifrost requires a configuration file to set up the gateway. This includes all your provider-level settings, keys, and meta configs for each of your providers. + +2. **Environment Variables**: If you don't want to include your keys in your config file, you can provide a `.env` file and add a prefix of `env.` followed by its key in your `.env` file. + +```json +{ + "keys": [{ + "value": "env.OPENAI_API_KEY", + "models": ["gpt-4o-mini", "gpt-4-turbo"], + "weight": 1.0 + }] +} +``` + +In this example, `OPENAI_API_KEY` refers to a key in the `.env` file. At runtime, its value will be used to replace the placeholder. + +The same setup applies to keys in meta configs of all providers: + +```json +{ + "meta_config": { + "secret_access_key": "env.BEDROCK_ACCESS_KEY", + "region": "env.BEDROCK_REGION" + } +} +``` + +In this example, `BEDROCK_ACCESS_KEY` and `BEDROCK_REGION` refer to keys in the `.env` file. + +Please refer to `config.example.json` and `.env.sample` for examples. + +### Docker Setup + +You can run Bifrost using our **independent Dockerfile**. Just copy our Dockerfile and run these commands to get your Bifrost instance up and running: + +```bash +docker build \ + --build-arg CONFIG_PATH=./config.example.json \ + --build-arg ENV_PATH=./.env.sample \ + --build-arg PORT=8080 \ + --build-arg POOL_SIZE=300 \ + -t bifrost-transports . + +docker run -p 8080:8080 bifrost-transports +``` + +You can also add a flag for `DROP_EXCESS_REQUESTS=false` in your Docker build command to drop excess requests when the buffer is full. Read more about `DROP_EXCESS_REQUESTS` and `POOL_SIZE` [here](https://github.com/maximhq/bifrost/tree/main?tab=README-ov-file#additional-configurations). + +--- + +### Go Setup + +If you wish to run Bifrost in your Go environment, follow these steps: + +1. Install your binary: + +```bash +go install github.com/maximhq/bifrost/transports/http@latest +``` + +2. Run your binary: + +- If it's in your PATH: +```bash +http -config config.json -env .env -port 8080 -pool-size 300 +``` + +- Otherwise: +```bash +./http -config config.json -env .env -port 8080 -pool-size 300 +``` + +You can also add a flag for `-drop-excess-requests=false` in your command to drop excess requests when the buffer is full. Read more about `DROP_EXCESS_REQUESTS` and `POOL_SIZE` [here](https://github.com/maximhq/bifrost/tree/main?tab=README-ov-file#additional-configurations). + +## ๐Ÿงฐ Usage + +Ensure that: +- Bifrost's HTTP server is running +- The providers/models you use are configured in your JSON config file + +### Text Completions + +```bash +curl -X POST http://localhost:8080/v1/text/completions \ + -H "Content-Type: application/json" \ + -d '{ + "provider": "openai", + "model": "gpt-4o-mini", + "text": "Once upon a time in the land of AI,", + "params": { + "temperature": 0.7, + "max_tokens": 100 + } + }' +``` + +### Chat Completions + +```bash +curl -X POST http://localhost:8080/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "provider": "openai", + "model": "gpt-4o-mini", + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Tell me about Bifrost in Norse mythology."} + ], + "params": { + "temperature": 0.8, + "max_tokens": 500 + } + }' +``` + +--- + +## ๐Ÿ”ง Advanced Features + +### Fallbacks + +Configure fallback options in your requests: + +```json +{ + "provider": "openai", + "model": "gpt-4", + "messages": [...], + "fallbacks": [ + { + "provider": "anthropic", + "model": "claude-3-opus-20240229" + }, + { + "provider": "bedrock", + "model": "anthropic.claude-3-sonnet-20240229-v1:0" + } + ] +} +``` + +Read more about fallbacks and other additional configurations [here](https://github.com/maximhq/bifrost/tree/main?tab=README-ov-file#additional-configurations). + +--- + +Built with โค๏ธ by [Maxim](https://github.com/maximhq) \ No newline at end of file diff --git a/transports/config.example.json b/transports/config.example.json index d769d6991e..159aecac63 100644 --- a/transports/config.example.json +++ b/transports/config.example.json @@ -2,7 +2,7 @@ "OpenAI": { "keys": [ { - "value": "env.OPEN_AI_API_KEY", + "value": "env.OPENAI_API_KEY", "models": ["gpt-4o-mini", "gpt-4-turbo"], "weight": 1.0 } diff --git a/transports/go.mod b/transports/go.mod index 639c9c7a49..c92d309e37 100644 --- a/transports/go.mod +++ b/transports/go.mod @@ -5,7 +5,7 @@ go 1.24.1 require ( github.com/fasthttp/router v1.5.4 github.com/joho/godotenv v1.5.1 - github.com/maximhq/bifrost/core v1.0.1 + github.com/maximhq/bifrost/core v1.0.2 github.com/valyala/fasthttp v1.60.0 ) @@ -24,6 +24,7 @@ require ( github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.1 // indirect github.com/aws/aws-sdk-go-v2/service/sts v1.33.19 // indirect github.com/aws/smithy-go v1.22.3 // indirect + github.com/goccy/go-json v0.10.5 // indirect github.com/klauspost/compress v1.18.0 // indirect github.com/savsgio/gotils v0.0.0-20240704082632-aef3928b8a38 // indirect github.com/valyala/bytebufferpool v1.0.0 // indirect diff --git a/transports/go.sum b/transports/go.sum index 3ef7b92f14..bab9764a15 100644 --- a/transports/go.sum +++ b/transports/go.sum @@ -28,12 +28,16 @@ github.com/aws/smithy-go v1.22.3 h1:Z//5NuZCSW6R4PhQ93hShNbyBbn8BWCmCVCt+Q8Io5k= github.com/aws/smithy-go v1.22.3/go.mod h1:t1ufH5HMublsJYulve2RKmHDC15xu1f26kHCp/HgceI= github.com/fasthttp/router v1.5.4 h1:oxdThbBwQgsDIYZ3wR1IavsNl6ZS9WdjKukeMikOnC8= github.com/fasthttp/router v1.5.4/go.mod h1:3/hysWq6cky7dTfzaaEPZGdptwjwx0qzTgFCKEWRjgc= +github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4= +github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= github.com/maximhq/bifrost/core v1.0.1 h1:B0u6o13faUexA+V0EUU0bsLW2dHg9+R2TZKQzPzCxlY= github.com/maximhq/bifrost/core v1.0.1/go.mod h1:4+Ept2EnX1EEjH/mBuSwK7eE56znI/BCoCbIrx25/x8= +github.com/maximhq/bifrost/core v1.0.2 h1:GG1CGrvbz5lbdDudlJodKHx9pHr0VAoUd5lhgxUWc00= +github.com/maximhq/bifrost/core v1.0.2/go.mod h1:ZF8LVnUwVzHZ3SkCQPvXXmu0w3b4sjRLS6ij9aPYcjg= github.com/savsgio/gotils v0.0.0-20240704082632-aef3928b8a38 h1:D0vL7YNisV2yqE55+q0lFuGse6U8lxlg7fYTctlT5Gc= github.com/savsgio/gotils v0.0.0-20240704082632-aef3928b8a38/go.mod h1:sM7Mt7uEoCeFSCBM+qBrqvEo+/9vdmj19wzp3yzUhmg= github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= diff --git a/transports/http/main.go b/transports/http/main.go index 023ee3b479..8af6fb3171 100644 --- a/transports/http/main.go +++ b/transports/http/main.go @@ -38,10 +38,11 @@ import ( // Command line flags var ( - initialPoolSize int // Initial size of the connection pool - port string // Port to run the server on - configPath string // Path to the config file - envPath string // Path to the .env file + initialPoolSize int // Initial size of the connection pool + dropExcessRequests bool // Drop excess requests + port string // Port to run the server on + configPath string // Path to the config file + envPath string // Path to the .env file ) // init initializes command line flags with default values. @@ -51,6 +52,7 @@ func init() { flag.StringVar(&port, "port", "8080", "Port to run the server on") flag.StringVar(&configPath, "config", "", "Path to the config file") flag.StringVar(&envPath, "env", "", "Path to the .env file") + flag.BoolVar(&dropExcessRequests, "drop-excess-requests", false, "Drop excess requests") flag.Parse() if configPath == "" { @@ -79,18 +81,18 @@ type ConfigMap map[schemas.ModelProvider]ProviderConfig // Returns a ConfigMap containing all provider configurations. // Panics if the config file cannot be read or parsed. // -// In the config file, use placeholder keys (e.g., env.OPEN_AI_API_KEY) instead of hardcoding actual values. +// In the config file, use placeholder keys (e.g., env.OPENAI_API_KEY) instead of hardcoding actual values. // These placeholders will be replaced with the corresponding values from the .env file. // Location of the .env file is specified by the -env flag. It // Example: // // "keys":[{ -// "value": "env.OPEN_AI_API_KEY" +// "value": "env.OPENAI_API_KEY" // "models": ["gpt-4o-mini", "gpt-4-turbo"], // "weight": 1.0 // }] // -// In this example, OPEN_AI_API_KEY refers to a key in the .env file. At runtime, its value will be used to replace the placeholder. +// In this example, OPENAI_API_KEY refers to a key in the .env file. At runtime, its value will be used to replace the placeholder. // Same setup applies to keys in meta configs of all the providers. // Example: // @@ -410,8 +412,9 @@ func main() { } client, err := bifrost.Init(schemas.BifrostConfig{ - Account: account, - InitialPoolSize: initialPoolSize, + Account: account, + InitialPoolSize: initialPoolSize, + DropExcessRequests: dropExcessRequests, }) if err != nil { log.Fatalf("failed to initialize bifrost: %v", err) diff --git a/transports/readMe.md b/transports/readMe.md deleted file mode 100644 index e69de29bb2..0000000000