Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,4 @@ storybook-static
.claude

.turbo
tsx-0/
89 changes: 89 additions & 0 deletions packages/app/server/src/providers/OpenAIAudioProvider.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import { Decimal } from '@prisma/client/runtime/library';
import { Transaction } from '../types';
import { BaseProvider } from './BaseProvider';
import { ProviderType } from './ProviderType';
import logger from '../logger';
import OpenAI from 'openai';

export class OpenAIAudioProvider extends BaseProvider {
private client: OpenAI;

constructor(stream: boolean, model: string) {
super(stream, model);
const apiKey = this.getApiKey();
if (!apiKey) {
throw new Error('OpenAI API key is required for audio provider');
}
this.client = new OpenAI({ apiKey });
}

getType(): ProviderType {
return ProviderType.OPENAI_AUDIO;
}

getBaseUrl(): string {
return this.OPENAI_BASE_URL;
}

getApiKey(): string | undefined {
return process.env.OPENAI_API_KEY;
}

async handleBody(data: string): Promise<Transaction> {
try {
const audioResponse = JSON.parse(data) as {
text: string;
duration?: number;
language?: string;
};

const durationSeconds = audioResponse.duration || 0;
const durationMinutes = durationSeconds / 60;
const costPerMinute = 0.006;
const totalCost = new Decimal(durationMinutes * costPerMinute);

const transaction: Transaction = {
metadata: {
providerId: 'openai-audio',
provider: 'openai',
model: this.getModel(),
durationSeconds,
generateAudio: false
},
rawTransactionCost: totalCost,
status: 'success',
};

logger.info('Audio transcription transaction:', {
model: this.getModel(),
cost: totalCost.toNumber(),
durationMinutes,
userId: this.getUserId(),
});

return transaction;
} catch (error) {
logger.error('Error processing audio response:', error);
throw error;
}
}

override supportsStream(): boolean {
// OpenAI supports streaming for audio with verbose_json response format
return true;
}

override ensureStreamUsage(
reqBody: Record<string, unknown>,
reqPath: string
): Record<string, unknown> {
return reqBody;
}

override transformRequestBody(
reqBody: Record<string, unknown>,
reqPath: string
): Record<string, unknown> {
return reqBody;
}
}
30 changes: 30 additions & 0 deletions packages/app/server/src/providers/ProviderFactory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import {
ALL_SUPPORTED_IMAGE_MODELS,
ALL_SUPPORTED_MODELS,
ALL_SUPPORTED_VIDEO_MODELS,
ALL_SUPPORTED_AUDIO_MODELS,
} from '../services/AccountingService';
import type { EchoControlService } from '../services/EchoControlService';
import { AnthropicGPTProvider } from './AnthropicGPTProvider';
Expand All @@ -17,6 +18,7 @@ import {
PROXY_PASSTHROUGH_ONLY_MODEL as GeminiVeoProxyPassthroughOnlyModel,
} from './GeminiVeoProvider';
import { GPTProvider } from './GPTProvider';
import { OpenAIAudioProvider } from './OpenAIAudioProvider';
import { OpenAIImageProvider } from './OpenAIImageProvider';
import { OpenAIResponsesProvider } from './OpenAIResponsesProvider';
import { OpenRouterProvider } from './OpenRouterProvider';
Expand Down Expand Up @@ -91,6 +93,19 @@ const createVideoModelToProviderMapping = (): Record<string, ProviderType> => {
return mapping;
};


const createAudioModelToProviderMapping = (): Record<string, ProviderType> => {
const mapping: Record<string, ProviderType> = {};

for (const modelConfig of ALL_SUPPORTED_AUDIO_MODELS) {
if (modelConfig.provider === 'OpenAI') {
mapping[modelConfig.model_id] = ProviderType.OPENAI_AUDIO;
}
}

return mapping;
};

/**
* Model-to-provider mapping loaded from model_prices_and_context_window.json
* This replaces the previous hardcoded mapping and automatically includes all
Expand All @@ -105,6 +120,9 @@ export const IMAGE_MODEL_TO_PROVIDER: Record<string, ProviderType> =
export const VIDEO_MODEL_TO_PROVIDER: Record<string, ProviderType> =
createVideoModelToProviderMapping();

export const AUDIO_MODEL_TO_PROVIDER: Record<string, ProviderType> =
createAudioModelToProviderMapping();

export const getProvider = (
model: string,
stream: boolean,
Expand All @@ -123,6 +141,11 @@ export const getProvider = (
type = videoType;
}

const audioType = AUDIO_MODEL_TO_PROVIDER[model];
if (audioType) {
type = audioType;
}

if (model === GeminiVeoProxyPassthroughOnlyModel) {
type = ProviderType.GEMINI_VEO;
}
Expand All @@ -145,6 +168,11 @@ export const getProvider = (
type = ProviderType.OPENAI_IMAGES;
}

// Check if this is an audio transcription or translation endpoint
if (completionPath.includes('audio/transcriptions') || completionPath.includes('audio/translations')) {
type = ProviderType.OPENAI_AUDIO;
}

// We select for Anthropic Native if the completionPath includes "messages"
// The OpenAI Format does not hit /v1/messages, it hits /v1/chat/completions
// but the anthropic native format hits /v1/messages
Expand Down Expand Up @@ -184,6 +212,8 @@ export const getProvider = (
return new OpenAIVideoProvider(stream, model);
case ProviderType.GROQ:
return new GroqProvider(stream, model);
case ProviderType.OPENAI_AUDIO:
return new OpenAIAudioProvider(stream, model);
default:
throw new Error(`Unknown provider type: ${type}`);
}
Expand Down
1 change: 1 addition & 0 deletions packages/app/server/src/providers/ProviderType.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ export enum ProviderType {
OPENAI_IMAGES = 'OPENAI_IMAGES',
OPENAI_VIDEOS = 'OPENAI_VIDEOS',
GROQ = 'GROQ',
OPENAI_AUDIO = 'OPENAI_AUDIO',
}
16 changes: 16 additions & 0 deletions packages/app/server/src/services/AccountingService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@ import {
OpenRouterModels,
GroqModels,
OpenAIImageModels,
OpenAIAudioModels,
SupportedOpenAIResponseToolPricing,
SupportedModel,
SupportedImageModel,
SupportedVideoModel,
SupportedAudioModel,
} from '@merit-systems/echo-typescript-sdk';

import { Decimal } from '@prisma/client/runtime/library';
Expand Down Expand Up @@ -40,6 +42,10 @@ export const ALL_SUPPORTED_VIDEO_MODELS: SupportedVideoModel[] = [
...OpenAIVideoModels,
];

export const ALL_SUPPORTED_AUDIO_MODELS: SupportedAudioModel[] = [
...OpenAIAudioModels,
];

// Create a lookup map for O(1) model price retrieval
const MODEL_PRICE_MAP = new Map<string, SupportedModel>();
ALL_SUPPORTED_MODELS.forEach(model => {
Expand All @@ -58,6 +64,12 @@ ALL_SUPPORTED_VIDEO_MODELS.forEach(model => {
VIDEO_MODEL_MAP.set(model.model_id, model);
});

// Create a separate map for audio models
const AUDIO_MODEL_MAP = new Map<string, SupportedAudioModel>();
ALL_SUPPORTED_AUDIO_MODELS.forEach(model => {
AUDIO_MODEL_MAP.set(model.model_id, model);
});

export const getModelPrice = (model: string) => {
const supportedModel = MODEL_PRICE_MAP.get(model);

Expand Down Expand Up @@ -112,6 +124,10 @@ export const isValidVideoModel = (model: string) => {
return VIDEO_MODEL_MAP.has(model);
};

export const isValidAudioModel = (model: string) => {
return AUDIO_MODEL_MAP.has(model);
};

export const getCostPerToken = (
model: string,
inputTokens: number,
Expand Down
2 changes: 1 addition & 1 deletion packages/app/server/src/services/EchoControlService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ export class EchoControlService {

constructor(db: PrismaClient, apiKey: string) {
// Check if the generated Prisma client exists
const generatedPrismaPath = join(__dirname, 'generated', 'prisma');
const generatedPrismaPath = join(__dirname, '..', 'generated', 'prisma');
if (!existsSync(generatedPrismaPath)) {
throw new Error(
`Generated Prisma client not found at ${generatedPrismaPath}. ` +
Expand Down
8 changes: 6 additions & 2 deletions packages/app/server/src/services/PricingService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import {
getVideoModelPrice,
isValidImageModel,
isValidVideoModel,
isValidAudioModel,
calculateToolCost,
getImageModelPrice,
} from './AccountingService';
Expand All @@ -20,8 +21,11 @@ export function getRequestMaxCost(
provider: BaseProvider,
isPassthroughProxyRoute: boolean
): Decimal {
// Need to switch between language/image/video for different pricing models.
if (isValidVideoModel(provider.getModel())) {
if (isValidAudioModel(provider.getModel())) {
const fileSizeBytes = Number(req.originalContentLength) || 1024 * 1024;
const estimatedMinutes = Math.max(1, fileSizeBytes / (1024 * 1024));
return new Decimal(estimatedMinutes * 0.006);
} else if (isValidVideoModel(provider.getModel())) {
const videoModelWithPricing = getVideoModelPrice(provider.getModel());
if (!videoModelWithPricing) {
throw new UnknownModelError(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import {
isValidImageModel,
isValidModel,
isValidVideoModel,
isValidAudioModel,
} from './AccountingService';
import { extractIsStream, extractModelName } from './RequestDataService';

Expand Down Expand Up @@ -67,7 +68,8 @@ export async function initializeProvider(
!model ||
(!isValidModel(model) &&
!isValidImageModel(model) &&
!isValidVideoModel(model))
!isValidVideoModel(model) &&
!isValidAudioModel(model))
) {
logger.warn(`Invalid model: ${model}`);
// if auth or x402 header, return 422
Expand Down
Loading