Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
cc0f6a8
feat: implement GCP Gemini request and response translation for OpenA…
sukumargaonkar Jul 2, 2025
cb12d44
refactor: remove unnecessary string manipulation for GCP model prefix
sukumargaonkar Jul 2, 2025
2088925
address pr comments
sukumargaonkar Jul 2, 2025
da5ec61
remove unnecessary nil check
sukumargaonkar Jul 2, 2025
0b2f21f
address PR comments
sukumargaonkar Jul 3, 2025
df58fe8
Merge remote-tracking branch 'upstream/main' into gcp-basic-requests
sukumargaonkar Jul 3, 2025
a10251e
add missing periods to comments
sukumargaonkar Jul 3, 2025
06821c7
Merge remote-tracking branch 'upstream/main' into gcp-basic-requests
sukumargaonkar Jul 7, 2025
03d28bb
add GCP testcase in textWithUpstream
sukumargaonkar Jul 8, 2025
4879b4e
fix local config
sukumargaonkar Jul 8, 2025
ddc602e
fix testcase
sukumargaonkar Jul 8, 2025
806a2f8
Merge remote-tracking branch 'upstream/main' into gcp-basic-requests
sukumargaonkar Jul 8, 2025
5b5419e
Merge remote-tracking branch 'upstream/main' into gcp-basic-requests
sukumargaonkar Jul 8, 2025
13e8926
handle empty gcp response
sukumargaonkar Jul 8, 2025
39656fc
address PR comment
sukumargaonkar Jul 8, 2025
5300537
address PR comment
sukumargaonkar Jul 9, 2025
1febd58
Merge remote-tracking branch 'upstream/main' into gcp-basic-requests
sukumargaonkar Jul 9, 2025
8393b25
fix test case
sukumargaonkar Jul 9, 2025
2fb7051
refactor: rename conversion functions for clarity and consistency
sukumargaonkar Jul 9, 2025
f61c4e7
Merge remote-tracking branch 'upstream/main' into gcp-basic-requests
sukumargaonkar Jul 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
478 changes: 470 additions & 8 deletions internal/extproc/translator/gemini_helper.go

Large diffs are not rendered by default.

786 changes: 786 additions & 0 deletions internal/extproc/translator/gemini_helper_test.go

Large diffs are not rendered by default.

29 changes: 4 additions & 25 deletions internal/extproc/translator/openai_awsbedrock.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,9 @@ package translator

import (
"bytes"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"regexp"
"strconv"
"strings"

Expand Down Expand Up @@ -160,25 +158,6 @@ func (o *openAIToAWSBedrockTranslatorV1ChatCompletion) openAIToolsToBedrockToolC
return nil
}

// regDataURI follows the web uri regex definition.
// https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data#syntax
var regDataURI = regexp.MustCompile(`\Adata:(.+?)?(;base64)?,`)

// parseDataURI parse data uri example: data:image/jpeg;base64,/9j/4AAQSkZJRgABAgAAZABkAAD.
func parseDataURI(uri string) (string, []byte, error) {
matches := regDataURI.FindStringSubmatch(uri)
if len(matches) != 3 {
return "", nil, fmt.Errorf("data uri does not have a valid format")
}
l := len(matches[0])
contentType := matches[1]
bin, err := base64.StdEncoding.DecodeString(uri[l:])
if err != nil {
return "", nil, err
}
return contentType, bin, nil
}

// openAIMessageToBedrockMessageRoleUser converts openai user role message.
func (o *openAIToAWSBedrockTranslatorV1ChatCompletion) openAIMessageToBedrockMessageRoleUser(
openAiMessage *openai.ChatCompletionUserMessageParam, role string,
Expand Down Expand Up @@ -208,13 +187,13 @@ func (o *openAIToAWSBedrockTranslatorV1ChatCompletion) openAIMessageToBedrockMes
}
var format string
switch contentType {
case "image/png":
case mimeTypeImagePNG:
format = "png"
case "image/jpeg":
case mimeTypeImageJPEG:
format = "jpeg"
case "image/gif":
case mimeTypeImageGIF:
format = "gif"
case "image/webp":
case mimeTypeImageWEBP:
format = "webp"
default:
return nil, fmt.Errorf("unsupported image type: %s please use one of [png, jpeg, gif, webp]",
Expand Down
109 changes: 101 additions & 8 deletions internal/extproc/translator/openai_gcpvertexai.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,15 @@
package translator

import (
"encoding/json"
"fmt"
"io"
"strconv"

extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
"google.golang.org/genai"

"github.com/envoyproxy/ai-gateway/internal/apischema/gcp"
"github.com/envoyproxy/ai-gateway/internal/apischema/openai"
)

Expand All @@ -23,15 +28,21 @@ type openAIToGCPVertexAITranslatorV1ChatCompletion struct{}

// RequestBody implements [Translator.RequestBody] for GCP Gemini.
// This method translates an OpenAI ChatCompletion request to a GCP Gemini API request.
func (o *openAIToGCPVertexAITranslatorV1ChatCompletion) RequestBody(_ []byte, openAIReq *openai.ChatCompletionRequest, onRetry bool) (
func (o *openAIToGCPVertexAITranslatorV1ChatCompletion) RequestBody(_ []byte, openAIReq *openai.ChatCompletionRequest, _ bool) (
headerMutation *extprocv3.HeaderMutation, bodyMutation *extprocv3.BodyMutation, err error,
) {
_, _ = openAIReq, onRetry
pathSuffix := buildGCPModelPathSuffix(GCPModelPublisherGoogle, openAIReq.Model, GCPMethodGenerateContent)

// TODO: Implement actual translation from OpenAI to Gemini request.
gcpReq, err := o.openAIMessageToGeminiMessage(openAIReq)
if err != nil {
return nil, nil, fmt.Errorf("error converting OpenAI request to Gemini request: %w", err)
}
gcpReqBody, err := json.Marshal(gcpReq)
if err != nil {
return nil, nil, fmt.Errorf("error marshaling Gemini request: %w", err)
}

headerMutation, bodyMutation = buildGCPRequestMutations(pathSuffix, nil)
headerMutation, bodyMutation = buildGCPRequestMutations(pathSuffix, gcpReqBody)
return headerMutation, bodyMutation, nil
}

Expand All @@ -46,10 +57,92 @@ func (o *openAIToGCPVertexAITranslatorV1ChatCompletion) ResponseHeaders(headers

// ResponseBody implements [Translator.ResponseBody] for GCP Gemini.
// This method translates a GCP Gemini API response to the OpenAI ChatCompletion format.
func (o *openAIToGCPVertexAITranslatorV1ChatCompletion) ResponseBody(respHeaders map[string]string, body io.Reader, endOfStream bool) (
func (o *openAIToGCPVertexAITranslatorV1ChatCompletion) ResponseBody(respHeaders map[string]string, body io.Reader, _ bool) (
headerMutation *extprocv3.HeaderMutation, bodyMutation *extprocv3.BodyMutation, tokenUsage LLMTokenUsage, err error,
) {
// TODO: Implement response body translation from GCP Gemini to OpenAI format.
_, _, _ = respHeaders, body, endOfStream
return nil, nil, LLMTokenUsage{}, nil
if statusStr, ok := respHeaders[statusHeaderName]; ok {
var status int
if status, err = strconv.Atoi(statusStr); err == nil {
if !isGoodStatusCode(status) {
// TODO: Parse GCP error response and convert to OpenAI error format.
// For now, just return error response as-is.
Comment on lines +67 to +68
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we prioritize this TODO, i think it is important translation to deliver user error response.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

plan to do this in the next PR

return nil, nil, LLMTokenUsage{}, err
}
}
}

// Parse the GCP response.
var gcpResp genai.GenerateContentResponse
if err = json.NewDecoder(body).Decode(&gcpResp); err != nil {
return nil, nil, LLMTokenUsage{}, fmt.Errorf("error decoding GCP response: %w", err)
}

var openAIRespBytes []byte
// Convert to OpenAI format.
openAIResp, err := o.geminiResponseToOpenAIMessage(gcpResp)
if err != nil {
return nil, nil, LLMTokenUsage{}, fmt.Errorf("error converting GCP response to OpenAI format: %w", err)
}

// Marshal the OpenAI response.
openAIRespBytes, err = json.Marshal(openAIResp)
if err != nil {
return nil, nil, LLMTokenUsage{}, fmt.Errorf("error marshaling OpenAI response: %w", err)
}

// Update token usage if available.
var usage LLMTokenUsage
if gcpResp.UsageMetadata != nil {
usage = LLMTokenUsage{
InputTokens: uint32(gcpResp.UsageMetadata.PromptTokenCount), // nolint:gosec
OutputTokens: uint32(gcpResp.UsageMetadata.CandidatesTokenCount), // nolint:gosec
TotalTokens: uint32(gcpResp.UsageMetadata.TotalTokenCount), // nolint:gosec
}
}

headerMutation, bodyMutation = buildGCPRequestMutations("", openAIRespBytes)

return headerMutation, bodyMutation, usage, nil
}

// openAIMessageToGeminiMessage converts an OpenAI ChatCompletionRequest to a GCP Gemini GenerateContentRequest.
func (o *openAIToGCPVertexAITranslatorV1ChatCompletion) openAIMessageToGeminiMessage(openAIReq *openai.ChatCompletionRequest) (gcp.GenerateContentRequest, error) {
// Convert OpenAI messages to Gemini Contents and SystemInstruction.
contents, systemInstruction, err := openAIMessagesToGeminiContents(openAIReq.Messages)
if err != nil {
return gcp.GenerateContentRequest{}, err
}

// Convert generation config.
generationConfig, err := openAIReqToGeminiGenerationConfig(openAIReq)
if err != nil {
return gcp.GenerateContentRequest{}, fmt.Errorf("error converting generation config: %w", err)
}

gcr := gcp.GenerateContentRequest{
Contents: contents,
Tools: nil,
ToolConfig: nil,
GenerationConfig: generationConfig,
SystemInstruction: systemInstruction,
}

return gcr, nil
}

func (o *openAIToGCPVertexAITranslatorV1ChatCompletion) geminiResponseToOpenAIMessage(gcr genai.GenerateContentResponse) (openai.ChatCompletionResponse, error) {
// Convert candidates to OpenAI choices.
choices, err := geminiCandidatesToOpenAIChoices(gcr.Candidates)
if err != nil {
return openai.ChatCompletionResponse{}, fmt.Errorf("error converting choices: %w", err)
}

// Set up the OpenAI response.
openaiResp := openai.ChatCompletionResponse{
Choices: choices,
Object: "chat.completion",
Usage: geminiUsageToOpenAIUsage(gcr.UsageMetadata),
}

return openaiResp, nil
}
Loading