Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 115 additions & 0 deletions bindings/kotlin/example/RuntimeStats.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
import kotlin.system.measureNanoTime
import kotlinx.coroutines.runBlocking
import uniffi.goose_llm.*

import java.net.URI
import java.net.http.HttpClient
import java.net.http.HttpRequest
import java.net.http.HttpResponse

/* ---------- Goose helpers ---------- */

fun buildProviderConfig(host: String, token: String): String =
"""{ "host": "$host", "token": "$token" }"""

suspend fun timeGooseCall(
modelCfg: ModelConfig,
providerName: String,
providerCfg: String
): Pair<Double, CompletionResponse> {

val req = createCompletionRequest(
providerName,
providerCfg,
modelCfg,
systemPreamble = "You are a helpful assistant.",
messages = listOf(
Message(
Role.USER,
System.currentTimeMillis() / 1000,
listOf(MessageContent.Text(TextContent("Write me a 1000 word chapter about learning Go vs Rust in the world of LLMs and AI.")))
)
),
extensions = emptyList()
)

lateinit var resp: CompletionResponse
val wallMs = measureNanoTime { resp = completion(req) } / 1_000_000.0
return wallMs to resp
}

/* ---------- OpenAI helpers ---------- */

fun timeOpenAiCall(client: HttpClient, apiKey: String): Double {
val body = """
{
"model": "gpt-4.1",
"max_tokens": 500,
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Write me a 1000 word chapter about learning Go vs Rust in the world of LLMs and AI."}
]
}
""".trimIndent()

val request = HttpRequest.newBuilder()
.uri(URI.create("https://api.openai.com/v1/chat/completions"))
.header("Authorization", "Bearer $apiKey")
.header("Content-Type", "application/json")
.POST(HttpRequest.BodyPublishers.ofString(body))
.build()

val wallMs = measureNanoTime {
client.send(request, HttpResponse.BodyHandlers.ofString())
} / 1_000_000.0

return wallMs
}

/* ---------- main ---------- */

fun main() = runBlocking {
/* Goose provider setup */
val providerName = "databricks"
val host = System.getenv("DATABRICKS_HOST") ?: error("DATABRICKS_HOST not set")
val token = System.getenv("DATABRICKS_TOKEN") ?: error("DATABRICKS_TOKEN not set")
val providerCfg = buildProviderConfig(host, token)

/* OpenAI setup */
val openAiKey = System.getenv("OPENAI_API_KEY") ?: error("OPENAI_API_KEY not set")
val httpClient = HttpClient.newBuilder().build()

val gooseModels = listOf("goose-claude-4-sonnet", "goose-gpt-4-1")
val runsPerModel = 3

/* --- Goose timing --- */
for (model in gooseModels) {
val maxTokens = 500
val cfg = ModelConfig(model, 100_000u, 0.0f, maxTokens)
var wallSum = 0.0
var gooseSum = 0.0

println("=== Goose: $model ===")
repeat(runsPerModel) { run ->
val (wall, resp) = timeGooseCall(cfg, providerName, providerCfg)
val gooseMs = resp.runtimeMetrics.totalTimeSec * 1_000
val overhead = wall - gooseMs
wallSum += wall
gooseSum += gooseMs
println("run ${run + 1}: wall = %.1f ms | goose-llm = %.1f ms | overhead = %.1f ms"
.format(wall, gooseMs, overhead))
}
println("-- avg wall = %.1f ms | avg overhead = %.1f ms --\n"
.format(wallSum / runsPerModel, (wallSum - gooseSum) / runsPerModel))
}

/* --- OpenAI direct timing --- */
var oaSum = 0.0
println("=== OpenAI: gpt-4.1 (direct HTTPS) ===")
repeat(runsPerModel) { run ->
val wall = timeOpenAiCall(httpClient, openAiKey)
oaSum += wall
println("run ${run + 1}: wall = %.1f ms".format(wall))
}
println("-- avg wall = %.1f ms --".format(oaSum / runsPerModel))
}
Loading
Loading