Skip to content

Commit ca50ad6

Browse files
committed
fix: handle detailed token shapes in normalizeUsage for service tier pricing
- Extract cached tokens from input_tokens_details/prompt_tokens_details - Derive total input from details when main totals are missing - Add support for reasoning tokens from output_tokens_details - Fix fallback chain for cache read tokens to properly use detailed shapes
1 parent c5f70bb commit ca50ad6

File tree

1 file changed

+24
-3
lines changed

1 file changed

+24
-3
lines changed

src/api/providers/openai-native.ts

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,24 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
6969
private normalizeUsage(usage: any, model: OpenAiNativeModel): ApiStreamUsageChunk | undefined {
7070
if (!usage) return undefined
7171

72-
const totalInputTokens = usage.input_tokens ?? usage.prompt_tokens ?? 0
72+
// Extract cached tokens from detailed shapes if available
73+
const cachedFromDetails =
74+
usage.input_tokens_details?.cached_tokens ?? usage.prompt_tokens_details?.cached_tokens ?? 0
75+
76+
// Derive total input from details if main totals are missing
77+
const derivedInput = usage.input_tokens_details
78+
? (usage.input_tokens_details.cached_tokens ?? 0) + (usage.input_tokens_details.cache_miss_tokens ?? 0)
79+
: 0
80+
81+
const totalInputTokens = usage.input_tokens ?? usage.prompt_tokens ?? derivedInput ?? 0
7382
const totalOutputTokens = usage.output_tokens ?? usage.completion_tokens ?? 0
7483
const cacheWriteTokens = usage.cache_creation_input_tokens ?? usage.cache_write_tokens ?? 0
75-
const cacheReadTokens = usage.cache_read_input_tokens ?? usage.cache_read_tokens ?? usage.cached_tokens ?? 0
84+
// Use detailed shapes for cache reads with proper fallback chain
85+
const cacheReadTokens =
86+
usage.cache_read_input_tokens ?? usage.cache_read_tokens ?? usage.cached_tokens ?? cachedFromDetails
87+
88+
// Extract reasoning tokens from output details if available
89+
const reasoningTokens = usage.output_tokens_details?.reasoning_tokens
7690

7791
// Resolve effective tier: prefer actual tier from response; otherwise requested tier
7892
const effectiveTier =
@@ -87,14 +101,21 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
87101
cacheReadTokens || 0,
88102
)
89103

90-
return {
104+
const result: ApiStreamUsageChunk = {
91105
type: "usage",
92106
inputTokens: totalInputTokens,
93107
outputTokens: totalOutputTokens,
94108
cacheWriteTokens,
95109
cacheReadTokens,
96110
totalCost,
97111
}
112+
113+
// Only include reasoningTokens if present
114+
if (reasoningTokens !== undefined) {
115+
;(result as any).reasoningTokens = reasoningTokens
116+
}
117+
118+
return result
98119
}
99120

100121
private resolveResponseId(responseId: string | undefined): void {

0 commit comments

Comments
 (0)