@@ -17,7 +17,6 @@ import type { ApiHandlerOptions } from "../../shared/api"
1717
1818import { calculateApiCostOpenAI } from "../../shared/cost"
1919
20- import { convertToOpenAiMessages } from "../transform/openai-format"
2120import { ApiStream , ApiStreamUsageChunk } from "../transform/stream"
2221import { getModelParams } from "../transform/model-params"
2322
@@ -60,34 +59,63 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
6059 this . options . enableGpt5ReasoningSummary = true
6160 }
6261 const apiKey = this . options . openAiNativeApiKey ?? "not-provided"
63-
6462 this . client = new OpenAI ( { baseURL : this . options . openAiNativeBaseUrl , apiKey } )
6563 }
6664
6765 private normalizeUsage ( usage : any , model : OpenAiNativeModel ) : ApiStreamUsageChunk | undefined {
6866 if ( ! usage ) return undefined
6967
70- const totalInputTokens = usage . input_tokens ?? usage . prompt_tokens ?? 0
68+ // Prefer detailed shapes when available (Responses API)
69+ const inputDetails = usage . input_tokens_details ?? usage . prompt_tokens_details
70+
71+ // Extract cache information from details with better readability
72+ const hasCachedTokens = typeof inputDetails ?. cached_tokens === "number"
73+ const hasCacheMissTokens = typeof inputDetails ?. cache_miss_tokens === "number"
74+ const cachedFromDetails = hasCachedTokens ? inputDetails . cached_tokens : 0
75+ const missFromDetails = hasCacheMissTokens ? inputDetails . cache_miss_tokens : 0
76+
77+ // If total input tokens are missing but we have details, derive from them
78+ let totalInputTokens = usage . input_tokens ?? usage . prompt_tokens ?? 0
79+ if ( totalInputTokens === 0 && inputDetails && ( cachedFromDetails > 0 || missFromDetails > 0 ) ) {
80+ totalInputTokens = cachedFromDetails + missFromDetails
81+ }
82+
7183 const totalOutputTokens = usage . output_tokens ?? usage . completion_tokens ?? 0
84+
85+ // Note: missFromDetails is NOT used as fallback for cache writes
86+ // Cache miss tokens represent tokens that weren't found in cache (part of input)
87+ // Cache write tokens represent tokens being written to cache for future use
7288 const cacheWriteTokens = usage . cache_creation_input_tokens ?? usage . cache_write_tokens ?? 0
73- const cacheReadTokens = usage . cache_read_input_tokens ?? usage . cache_read_tokens ?? usage . cached_tokens ?? 0
7489
90+ const cacheReadTokens =
91+ usage . cache_read_input_tokens ?? usage . cache_read_tokens ?? usage . cached_tokens ?? cachedFromDetails ?? 0
92+
93+ // Pass total input tokens directly to calculateApiCostOpenAI
94+ // The function handles subtracting both cache reads and writes internally (see shared/cost.ts:46)
7595 const totalCost = calculateApiCostOpenAI (
7696 model . info ,
7797 totalInputTokens ,
7898 totalOutputTokens ,
79- cacheWriteTokens || 0 ,
80- cacheReadTokens || 0 ,
99+ cacheWriteTokens ,
100+ cacheReadTokens ,
81101 )
82102
83- return {
103+ const reasoningTokens =
104+ typeof usage . output_tokens_details ?. reasoning_tokens === "number"
105+ ? usage . output_tokens_details . reasoning_tokens
106+ : undefined
107+
108+ const out : ApiStreamUsageChunk = {
84109 type : "usage" ,
110+ // Keep inputTokens as TOTAL input to preserve correct context length
85111 inputTokens : totalInputTokens ,
86112 outputTokens : totalOutputTokens ,
87113 cacheWriteTokens,
88114 cacheReadTokens,
115+ ...( typeof reasoningTokens === "number" ? { reasoningTokens } : { } ) ,
89116 totalCost,
90117 }
118+ return out
91119 }
92120
93121 private resolveResponseId ( responseId : string | undefined ) : void {
0 commit comments