chore(openai-native): restore file from main to revert formatting changes

daniel-lxs · daniel-lxs · commit f90906a36c94 · 2025-09-04T00:18:58.000-05:00
diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts
@@ -17,7 +17,6 @@ import type { ApiHandlerOptions } from "../../shared/api"
 
 import { calculateApiCostOpenAI } from "../../shared/cost"
 
-import { convertToOpenAiMessages } from "../transform/openai-format"
 import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
 import { getModelParams } from "../transform/model-params"
 
@@ -60,34 +59,63 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			this.options.enableGpt5ReasoningSummary = true
 		}
 		const apiKey = this.options.openAiNativeApiKey ?? "not-provided"
-
 		this.client = new OpenAI({ baseURL: this.options.openAiNativeBaseUrl, apiKey })
 	}
 
 	private normalizeUsage(usage: any, model: OpenAiNativeModel): ApiStreamUsageChunk | undefined {
 		if (!usage) return undefined
 
-		const totalInputTokens = usage.input_tokens ?? usage.prompt_tokens ?? 0
+		// Prefer detailed shapes when available (Responses API)
+		const inputDetails = usage.input_tokens_details ?? usage.prompt_tokens_details
+
+		// Extract cache information from details with better readability
+		const hasCachedTokens = typeof inputDetails?.cached_tokens === "number"
+		const hasCacheMissTokens = typeof inputDetails?.cache_miss_tokens === "number"
+		const cachedFromDetails = hasCachedTokens ? inputDetails.cached_tokens : 0
+		const missFromDetails = hasCacheMissTokens ? inputDetails.cache_miss_tokens : 0
+
+		// If total input tokens are missing but we have details, derive from them
+		let totalInputTokens = usage.input_tokens ?? usage.prompt_tokens ?? 0
+		if (totalInputTokens === 0 && inputDetails && (cachedFromDetails > 0 || missFromDetails > 0)) {
+			totalInputTokens = cachedFromDetails + missFromDetails
+		}
+
 		const totalOutputTokens = usage.output_tokens ?? usage.completion_tokens ?? 0
+
+		// Note: missFromDetails is NOT used as fallback for cache writes
+		// Cache miss tokens represent tokens that weren't found in cache (part of input)
+		// Cache write tokens represent tokens being written to cache for future use
 		const cacheWriteTokens = usage.cache_creation_input_tokens ?? usage.cache_write_tokens ?? 0
-		const cacheReadTokens = usage.cache_read_input_tokens ?? usage.cache_read_tokens ?? usage.cached_tokens ?? 0
 
+		const cacheReadTokens =
+			usage.cache_read_input_tokens ?? usage.cache_read_tokens ?? usage.cached_tokens ?? cachedFromDetails ?? 0
+
+		// Pass total input tokens directly to calculateApiCostOpenAI
+		// The function handles subtracting both cache reads and writes internally (see shared/cost.ts:46)
 		const totalCost = calculateApiCostOpenAI(
 			model.info,
 			totalInputTokens,
 			totalOutputTokens,
-			cacheWriteTokens || 0,
-			cacheReadTokens || 0,
+			cacheWriteTokens,
+			cacheReadTokens,
 		)
 
-		return {
+		const reasoningTokens =
+			typeof usage.output_tokens_details?.reasoning_tokens === "number"
+				? usage.output_tokens_details.reasoning_tokens
+				: undefined
+
+		const out: ApiStreamUsageChunk = {
 			type: "usage",
+			// Keep inputTokens as TOTAL input to preserve correct context length
 			inputTokens: totalInputTokens,
 			outputTokens: totalOutputTokens,
 			cacheWriteTokens,
 			cacheReadTokens,
+			...(typeof reasoningTokens === "number" ? { reasoningTokens } : {}),
 			totalCost,
 		}
+		return out
 	}
 
 	private resolveResponseId(responseId: string | undefined): void {