Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,4 @@ storybook-static
.claude

.turbo
tsx-0/
93 changes: 93 additions & 0 deletions packages/app/server/src/providers/OpenAIAudioProvider.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import { Decimal } from '@prisma/client/runtime/library';
import { Transaction } from '../types';
import { BaseProvider } from './BaseProvider';
import { ProviderType } from './ProviderType';
import logger from '../logger';
import OpenAI from 'openai';

export class OpenAIAudioProvider extends BaseProvider {
private client: OpenAI;

constructor(stream: boolean, model: string) {
super(stream, model);
const apiKey = this.getApiKey();
if (!apiKey) {
throw new Error('OpenAI API key is required for audio provider');
}
this.client = new OpenAI({ apiKey });
}

getType(): ProviderType {
return ProviderType.OPENAI_AUDIO;
}

getBaseUrl(): string {
return this.OPENAI_BASE_URL;
}

getApiKey(): string | undefined {
return process.env.OPENAI_API_KEY;
}

async handleBody(data: string): Promise<Transaction> {
try {
const audioResponse = JSON.parse(data) as {
text: string;
duration?: number;
language?: string;
};

const durationSeconds = audioResponse.duration || 0;
const durationMinutes = durationSeconds / 60;
const costPerMinute = 0.006;
const totalCost = new Decimal(durationMinutes * costPerMinute);

const transaction: Transaction = {
metadata: {
providerId: 'openai-audio',
provider: 'openai',
model: this.getModel(),
durationSeconds,
generateAudio: false
},
rawTransactionCost: totalCost,
status: 'success',
};

logger.info('Audio transcription transaction:', {
model: this.getModel(),
cost: totalCost.toNumber(),
durationMinutes,
userId: this.getUserId(),
});

return transaction;
} catch (error) {
logger.error('Error processing audio response:', error);
throw error;
}
}

override supportsStream(): boolean {
// OpenAI supports streaming for audio with verbose_json response format
return true;
}

override ensureStreamUsage(
reqBody: Record<string, unknown>,
reqPath: string
): Record<string, unknown> {
return reqBody;
}

override transformRequestBody(
reqBody: Record<string, unknown>,
reqPath: string
): Record<string, unknown> {
// Force verbose_json format to get duration field for cost calculation
return {
...reqBody,
response_format: 'verbose_json'
};
}
}
30 changes: 30 additions & 0 deletions packages/app/server/src/providers/ProviderFactory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import {
ALL_SUPPORTED_IMAGE_MODELS,
ALL_SUPPORTED_MODELS,
ALL_SUPPORTED_VIDEO_MODELS,
ALL_SUPPORTED_AUDIO_MODELS,
} from '../services/AccountingService';
import type { EchoControlService } from '../services/EchoControlService';
import { AnthropicGPTProvider } from './AnthropicGPTProvider';
Expand All @@ -17,6 +18,7 @@ import {
PROXY_PASSTHROUGH_ONLY_MODEL as GeminiVeoProxyPassthroughOnlyModel,
} from './GeminiVeoProvider';
import { GPTProvider } from './GPTProvider';
import { OpenAIAudioProvider } from './OpenAIAudioProvider';
import { OpenAIImageProvider } from './OpenAIImageProvider';
import { OpenAIResponsesProvider } from './OpenAIResponsesProvider';
import { OpenRouterProvider } from './OpenRouterProvider';
Expand Down Expand Up @@ -97,6 +99,19 @@ const createVideoModelToProviderMapping = (): Record<string, ProviderType> => {
return mapping;
};


const createAudioModelToProviderMapping = (): Record<string, ProviderType> => {
const mapping: Record<string, ProviderType> = {};

for (const modelConfig of ALL_SUPPORTED_AUDIO_MODELS) {
if (modelConfig.provider === 'OpenAI') {
mapping[modelConfig.model_id] = ProviderType.OPENAI_AUDIO;
}
}

return mapping;
};

/**
* Model-to-provider mapping loaded from model_prices_and_context_window.json
* This replaces the previous hardcoded mapping and automatically includes all
Expand All @@ -111,6 +126,9 @@ export const IMAGE_MODEL_TO_PROVIDER: Record<string, ProviderType> =
export const VIDEO_MODEL_TO_PROVIDER: Record<string, ProviderType> =
createVideoModelToProviderMapping();

export const AUDIO_MODEL_TO_PROVIDER: Record<string, ProviderType> =
createAudioModelToProviderMapping();

export const getProvider = (
model: string,
stream: boolean,
Expand All @@ -129,6 +147,11 @@ export const getProvider = (
type = videoType;
}

const audioType = AUDIO_MODEL_TO_PROVIDER[model];
if (audioType) {
type = audioType;
}

if (model === GeminiVeoProxyPassthroughOnlyModel) {
type = ProviderType.GEMINI_VEO;
}
Expand All @@ -151,6 +174,11 @@ export const getProvider = (
type = ProviderType.OPENAI_IMAGES;
}

// Check if this is an audio transcription or translation endpoint
if (completionPath.includes('audio/transcriptions') || completionPath.includes('audio/translations')) {
type = ProviderType.OPENAI_AUDIO;
}

// We select for Anthropic Native if the completionPath includes "messages"
// The OpenAI Format does not hit /v1/messages, it hits /v1/chat/completions
// but the anthropic native format hits /v1/messages
Expand Down Expand Up @@ -190,6 +218,8 @@ export const getProvider = (
return new OpenAIVideoProvider(stream, model);
case ProviderType.GROQ:
return new GroqProvider(stream, model);
case ProviderType.OPENAI_AUDIO:
return new OpenAIAudioProvider(stream, model);
case ProviderType.XAI:
return new XAIProvider(stream, model);
default:
Expand Down
1 change: 1 addition & 0 deletions packages/app/server/src/providers/ProviderType.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,6 @@ export enum ProviderType {
OPENAI_IMAGES = 'OPENAI_IMAGES',
OPENAI_VIDEOS = 'OPENAI_VIDEOS',
GROQ = 'GROQ',
OPENAI_AUDIO = 'OPENAI_AUDIO',
XAI = 'XAI',
}
16 changes: 16 additions & 0 deletions packages/app/server/src/services/AccountingService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@ import {
OpenRouterModels,
GroqModels,
OpenAIImageModels,
OpenAIAudioModels,
SupportedOpenAIResponseToolPricing,
SupportedModel,
SupportedImageModel,
SupportedVideoModel,
SupportedAudioModel,
XAIModels,
} from '@merit-systems/echo-typescript-sdk';

Expand Down Expand Up @@ -42,6 +44,10 @@ export const ALL_SUPPORTED_VIDEO_MODELS: SupportedVideoModel[] = [
...OpenAIVideoModels,
];

export const ALL_SUPPORTED_AUDIO_MODELS: SupportedAudioModel[] = [
...OpenAIAudioModels,
];

// Create a lookup map for O(1) model price retrieval
const MODEL_PRICE_MAP = new Map<string, SupportedModel>();
ALL_SUPPORTED_MODELS.forEach(model => {
Expand All @@ -60,6 +66,12 @@ ALL_SUPPORTED_VIDEO_MODELS.forEach(model => {
VIDEO_MODEL_MAP.set(model.model_id, model);
});

// Create a separate map for audio models
const AUDIO_MODEL_MAP = new Map<string, SupportedAudioModel>();
ALL_SUPPORTED_AUDIO_MODELS.forEach(model => {
AUDIO_MODEL_MAP.set(model.model_id, model);
});

export const getModelPrice = (model: string) => {
const supportedModel = MODEL_PRICE_MAP.get(model);

Expand Down Expand Up @@ -114,6 +126,10 @@ export const isValidVideoModel = (model: string) => {
return VIDEO_MODEL_MAP.has(model);
};

export const isValidAudioModel = (model: string) => {
return AUDIO_MODEL_MAP.has(model);
};

export const getCostPerToken = (
model: string,
inputTokens: number,
Expand Down
2 changes: 1 addition & 1 deletion packages/app/server/src/services/EchoControlService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ export class EchoControlService {

constructor(db: PrismaClient, apiKey: string) {
// Check if the generated Prisma client exists
const generatedPrismaPath = join(__dirname, 'generated', 'prisma');
const generatedPrismaPath = join(__dirname, '..', 'generated', 'prisma');
if (!existsSync(generatedPrismaPath)) {
throw new Error(
`Generated Prisma client not found at ${generatedPrismaPath}. ` +
Expand Down
8 changes: 6 additions & 2 deletions packages/app/server/src/services/PricingService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import {
getVideoModelPrice,
isValidImageModel,
isValidVideoModel,
isValidAudioModel,
calculateToolCost,
getImageModelPrice,
} from './AccountingService';
Expand All @@ -26,8 +27,11 @@ export function getRequestMaxCost(
provider: BaseProvider,
isPassthroughProxyRoute: boolean
): Decimal {
// Need to switch between language/image/video for different pricing models.
if (isValidVideoModel(provider.getModel())) {
if (isValidAudioModel(provider.getModel())) {
const fileSizeBytes = Number(req.originalContentLength) || 1024 * 1024;
const estimatedMinutes = Math.max(1, fileSizeBytes / (1024 * 1024));
return new Decimal(estimatedMinutes * 0.006);
} else if (isValidVideoModel(provider.getModel())) {
const videoModelWithPricing = getVideoModelPrice(provider.getModel());
if (!videoModelWithPricing) {
throw new UnknownModelError(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import {
isValidImageModel,
isValidModel,
isValidVideoModel,
isValidAudioModel,
} from './AccountingService';
import { extractIsStream, extractModelName } from './RequestDataService';

Expand Down Expand Up @@ -67,7 +68,8 @@ export async function initializeProvider(
!model ||
(!isValidModel(model) &&
!isValidImageModel(model) &&
!isValidVideoModel(model))
!isValidVideoModel(model) &&
!isValidAudioModel(model))
) {
logger.warn(`Invalid model: ${model}`);
// if auth or x402 header, return 422
Expand Down
Loading