From 33509fb86258661af9f0cdb6bf7c14d0301c1f9f Mon Sep 17 00:00:00 2001 From: strawgate Date: Mon, 27 Apr 2026 23:41:30 -0500 Subject: [PATCH 01/11] feat(logsdb): comprehensive benchmark suite + interactive demo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add cross-corpus benchmarks for storage efficiency, query latency, and ingest throughput across 5 synthetic workload types (syslog, structured JSON, high-cardinality, cloud-native, mixed) at 1K/10K/100K scale. Key findings from initial run: - Syslog: 14.2 B/log (5.1× vs text, 14.9× vs ndjson) — excellent - Cloud-native: 21.1 B/log (4.4× vs text, 16.1× vs ndjson) — good - Mixed: 38.9 B/log (4.0× vs text, 9.5× vs ndjson) - Structured JSON: 47.7 B/log (6.3× vs text, 10.5× vs ndjson) - High-cardinality: 66.1 B/log (1.3× vs text, 7.7× vs ndjson) Ingest throughput: 4K-25K records/s depending on workload complexity. Query pruning working well: time_range queries prune 80-90% of chunks. Also includes: - Interactive logsdb-engine demo with Storage Explorer, Logs Explorer, and Query Builder panels using the real o11ylogsdb engine - 32 demo tests (data-gen, storage-model, query-model) - Browser compatibility fix for query.ts (performance.now fallback) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../bench/comprehensive-query.bench.ts | 236 ++++++++ .../bench/comprehensive-storage.bench.ts | 173 ++++++ .../bench/ingest-throughput.bench.ts | 164 ++++++ packages/o11ylogsdb/bench/run.mjs | 3 + .../o11ylogsdb/bench/synthetic-corpora.ts | 365 +++++++++++++ packages/o11ylogsdb/src/query.ts | 3 + site/logsdb-engine/css/base.css | 17 + site/logsdb-engine/css/demo.css | 427 +++++++++++++++ site/logsdb-engine/index.html | 145 +++++ site/logsdb-engine/js/app.js | 508 +++++++++++++++++ site/logsdb-engine/js/data-gen.js | 511 ++++++++++++++++++ site/logsdb-engine/js/logs-model.js | 171 ++++++ site/logsdb-engine/js/query-model.js | 204 +++++++ site/logsdb-engine/js/storage-model.js | 181 +++++++ site/logsdb-engine/js/zlib-stub.js | 17 + site/logsdb-engine/test/data-gen.test.js | 107 ++++ site/logsdb-engine/test/query-model.test.js | 176 ++++++ site/logsdb-engine/test/storage-model.test.js | 80 +++ site/logsdb-engine/vite.config.ts | 26 + 19 files changed, 3514 insertions(+) create mode 100644 packages/o11ylogsdb/bench/comprehensive-query.bench.ts create mode 100644 packages/o11ylogsdb/bench/comprehensive-storage.bench.ts create mode 100644 packages/o11ylogsdb/bench/ingest-throughput.bench.ts create mode 100644 packages/o11ylogsdb/bench/synthetic-corpora.ts create mode 100644 site/logsdb-engine/css/base.css create mode 100644 site/logsdb-engine/css/demo.css create mode 100644 site/logsdb-engine/index.html create mode 100644 site/logsdb-engine/js/app.js create mode 100644 site/logsdb-engine/js/data-gen.js create mode 100644 site/logsdb-engine/js/logs-model.js create mode 100644 site/logsdb-engine/js/query-model.js create mode 100644 site/logsdb-engine/js/storage-model.js create mode 100644 site/logsdb-engine/js/zlib-stub.js create mode 100644 site/logsdb-engine/test/data-gen.test.js create mode 100644 site/logsdb-engine/test/query-model.test.js create mode 100644 site/logsdb-engine/test/storage-model.test.js create mode 100644 site/logsdb-engine/vite.config.ts diff --git a/packages/o11ylogsdb/bench/comprehensive-query.bench.ts b/packages/o11ylogsdb/bench/comprehensive-query.bench.ts new file mode 100644 index 00000000..29ab4f1f --- /dev/null +++ b/packages/o11ylogsdb/bench/comprehensive-query.bench.ts @@ -0,0 +1,236 @@ +/** + * comprehensive-query.bench.ts — Cross-corpus query performance benchmark. + * + * Tests common query patterns against all synthetic corpus types at 10K scale: + * 1. Time range (first 10%, last 10%) + * 2. Severity filter (ERROR+, WARN+) + * 3. Body substring search + * 4. Resource/service filter + * 5. Combined predicates (time + severity + body) + * 6. Full scan (no predicates) + * + * Reports query latency (p50/p99), records scanned, records emitted, + * and pruning effectiveness (chunks pruned / total). + */ + +import { + defaultRegistry, + GzipCodec, + type InstrumentationScope, + type LogRecord, + LogStore, + query, + type QuerySpec, + type Resource, + TypedColumnarDrainPolicy, + ZstdCodec, +} from "../dist/index.js"; +import { + CORPUS_GENERATORS, + type SyntheticCorpusType, +} from "./synthetic-corpora.js"; +import { nowMillis } from "./harness.js"; +import { buildProfileReport, type ProfileResult, profileEncode } from "./profile-harness.js"; + +const SCOPE: InstrumentationScope = { name: "bench-query", version: "0.0.0" }; +const RECORD_COUNT = 10_000; + +function buildResource(corpusType: string): Resource { + return { + attributes: [ + { key: "service.name", value: `bench-${corpusType}` }, + { key: "corpus.type", value: corpusType }, + ], + }; +} + +function buildStore(corpusType: SyntheticCorpusType): LogStore { + const store = new LogStore({ + registry: defaultRegistry() + .register(new GzipCodec(6)) + .register(new ZstdCodec(3)) + .register(new ZstdCodec(9)) + .register(new ZstdCodec(19)), + policy: new TypedColumnarDrainPolicy({ bodyCodec: "zstd-19" }), + rowsPerChunk: 1024, + }); + + const records = CORPUS_GENERATORS[corpusType](RECORD_COUNT); + const resource = buildResource(corpusType); + for (const record of records) { + store.append(resource, SCOPE, record); + } + store.flush(); + return store; +} + +interface QueryCase { + name: string; + description: string; + spec: (records: LogRecord[]) => QuerySpec; +} + +const QUERY_CASES: QueryCase[] = [ + { + name: "full_scan", + description: "No predicates — full decode + emit", + spec: () => ({}), + }, + { + name: "severity_warn+", + description: "Severity >= WARN (13)", + spec: () => ({ severityGte: 13 }), + }, + { + name: "severity_error+", + description: "Severity >= ERROR (17)", + spec: () => ({ severityGte: 17 }), + }, + { + name: "time_first_10pct", + description: "First 10% of time range", + spec: (records) => { + const first = records[0]!.timeUnixNano; + const last = records[records.length - 1]!.timeUnixNano; + const range = last - first; + return { range: { from: first, to: first + range / 10n } }; + }, + }, + { + name: "time_last_10pct", + description: "Last 10% of time range", + spec: (records) => { + const first = records[0]!.timeUnixNano; + const last = records[records.length - 1]!.timeUnixNano; + const range = last - first; + return { range: { from: last - range / 10n, to: last } }; + }, + }, + { + name: "service_match", + description: "Resource service.name match", + spec: () => ({ + resourceEquals: { "service.name": "bench-syslog" }, + }), + }, + { + name: "service_no_match", + description: "Resource service.name no match", + spec: () => ({ + resourceEquals: { "service.name": "_does_not_exist_" }, + }), + }, + { + name: "body_substring", + description: "Body contains common keyword", + spec: () => ({ bodyContains: "error" }), + }, + { + name: "combined_time+severity", + description: "Time range + severity filter", + spec: (records) => { + const first = records[0]!.timeUnixNano; + const last = records[records.length - 1]!.timeUnixNano; + const range = last - first; + return { + severityGte: 17, + range: { from: first, to: first + range / 2n }, + }; + }, + }, + { + name: "combined_all", + description: "Time + severity + service + body", + spec: (records) => { + const first = records[0]!.timeUnixNano; + const last = records[records.length - 1]!.timeUnixNano; + const range = last - first; + return { + severityGte: 13, + range: { from: first, to: first + range / 2n }, + resourceEquals: { "corpus.type": "syslog" }, + bodyContains: "ssh", + }; + }, + }, +]; + +const CORPUS_TYPES: SyntheticCorpusType[] = [ + "syslog", + "structured", + "high-cardinality", + "cloud-native", + "mixed", +]; + +function runQueryCase( + corpusType: SyntheticCorpusType, + store: LogStore, + records: LogRecord[], + qCase: QueryCase +): ProfileResult { + const spec = qCase.spec(records); + const totalChunkBytes = store.stats().totalChunkBytes; + + // Warm up + sample to get record count + const sample = query(store, spec); + process.stderr.write( + ` ${qCase.name.padEnd(24)} emitted=${String(sample.records.length).padStart(6)} ` + + `scanned=${sample.stats.chunksScanned} pruned=${sample.stats.chunksPruned}\n` + ); + + // Estimate raw sizes for ratio fields + const rawTextBytes = records.reduce((s, r) => { + const body = typeof r.body === "string" ? r.body : JSON.stringify(r.body); + return s + body.length; + }, 0); + + return profileEncode({ + corpus: corpusType, + codec: qCase.name, + inputBytes: totalChunkBytes, + rawTextBytes, + rawNdjsonBytes: rawTextBytes * 2, // rough proxy + logCount: sample.records.length || 1, + encode: () => { + const r = query(store, spec); + return r.records.length; + }, + options: { warmup: 2, iterations: 5 }, + }); +} + +export default async function run() { + process.stderr.write("\n═══ Comprehensive Query Benchmark (10K records per corpus) ═══\n\n"); + const results: ProfileResult[] = []; + + for (const corpusType of CORPUS_TYPES) { + process.stderr.write(` ─── ${corpusType} ───\n`); + const records = CORPUS_GENERATORS[corpusType](RECORD_COUNT); + const store = buildStore(corpusType); + + for (const qCase of QUERY_CASES) { + results.push(runQueryCase(corpusType, store, records, qCase)); + } + process.stderr.write("\n"); + } + + // Summary + process.stderr.write("─── Query latency summary (p50 ms) ───\n"); + process.stderr.write(" " + "query".padEnd(24)); + for (const ct of CORPUS_TYPES) process.stderr.write(ct.padEnd(16)); + process.stderr.write("\n"); + + for (const qCase of QUERY_CASES) { + process.stderr.write(" " + qCase.name.padEnd(24)); + for (const ct of CORPUS_TYPES) { + const r = results.find((x) => x.corpus === ct && x.codec === qCase.name); + const val = r ? r.timing.p50.toFixed(1) : "—"; + process.stderr.write(val.padEnd(16)); + } + process.stderr.write("\n"); + } + process.stderr.write("\n"); + + return buildProfileReport("comprehensive-query", results); +} diff --git a/packages/o11ylogsdb/bench/comprehensive-storage.bench.ts b/packages/o11ylogsdb/bench/comprehensive-storage.bench.ts new file mode 100644 index 00000000..99877ec2 --- /dev/null +++ b/packages/o11ylogsdb/bench/comprehensive-storage.bench.ts @@ -0,0 +1,173 @@ +/** + * comprehensive-storage.bench.ts — Cross-corpus storage efficiency benchmark. + * + * Tests all synthetic corpus types (syslog, structured JSON, high-cardinality, + * cloud-native, mixed) at multiple sizes through the full TypedColumnarDrainPolicy + * engine stack. Reports B/log, compression ratio, and ingest throughput. + * + * This is the merge gate benchmark for storage efficiency across workloads. + */ + +import { + defaultRegistry, + GzipCodec, + type InstrumentationScope, + type LogRecord, + LogStore, + type Resource, + TypedColumnarDrainPolicy, + ZstdCodec, +} from "../dist/index.js"; +import { + CORPUS_GENERATORS, + CORPUS_SIZES, + type CorpusSize, + type SyntheticCorpusType, +} from "./synthetic-corpora.js"; +import { + buildReport, + bytesPerLog, + type CompressionResult, + nowMillis, + ratio as ratioFn, +} from "./harness.js"; + +const SCOPE: InstrumentationScope = { name: "bench-comprehensive", version: "0.0.0" }; + +function buildResource(corpusType: string): Resource { + return { + attributes: [ + { key: "service.name", value: `bench-${corpusType}` }, + { key: "corpus.type", value: corpusType }, + ], + }; +} + +interface BenchCase { + corpusType: SyntheticCorpusType; + size: CorpusSize; +} + +// Run 10K for all types (fast), 100K for key types, skip 1M for CI speed +const CASES: BenchCase[] = [ + { corpusType: "syslog", size: "1k" }, + { corpusType: "syslog", size: "10k" }, + { corpusType: "syslog", size: "100k" }, + { corpusType: "structured", size: "1k" }, + { corpusType: "structured", size: "10k" }, + { corpusType: "structured", size: "100k" }, + { corpusType: "high-cardinality", size: "1k" }, + { corpusType: "high-cardinality", size: "10k" }, + { corpusType: "high-cardinality", size: "100k" }, + { corpusType: "cloud-native", size: "1k" }, + { corpusType: "cloud-native", size: "10k" }, + { corpusType: "cloud-native", size: "100k" }, + { corpusType: "mixed", size: "1k" }, + { corpusType: "mixed", size: "10k" }, + { corpusType: "mixed", size: "100k" }, +]; + +function measureRawSize(records: LogRecord[]): number { + // Raw NDJSON size: what you'd get without any compression + // Custom replacer handles BigInt fields + let total = 0; + const replacer = (_k: string, v: unknown) => + typeof v === "bigint" ? v.toString() : v; + for (const r of records) { + total += JSON.stringify(r, replacer).length + 1; // +1 for newline + } + return total; +} + +function measureRawTextSize(records: LogRecord[]): number { + let total = 0; + for (const r of records) { + const body = typeof r.body === "string" ? r.body : JSON.stringify(r.body); + total += body.length + 1; + } + return total; +} + +function runCase(c: BenchCase): CompressionResult { + const count = CORPUS_SIZES[c.size]; + const generator = CORPUS_GENERATORS[c.corpusType]; + const label = `${c.corpusType}/${c.size}`; + + process.stderr.write(` Generating ${label} (${count.toLocaleString()} records)… `); + const t0 = nowMillis(); + const records = generator(count); + const genMs = nowMillis() - t0; + process.stderr.write(`${genMs.toFixed(0)}ms\n`); + + const rawNdjsonBytes = measureRawSize(records); + const rawTextBytes = measureRawTextSize(records); + + // Ingest into LogStore with TypedColumnarDrainPolicy + ZSTD-19 + const store = new LogStore({ + registry: defaultRegistry() + .register(new GzipCodec(6)) + .register(new ZstdCodec(3)) + .register(new ZstdCodec(9)) + .register(new ZstdCodec(19)), + policy: new TypedColumnarDrainPolicy({ bodyCodec: "zstd-19" }), + rowsPerChunk: 1024, + }); + + const resource = buildResource(c.corpusType); + const ingestStart = nowMillis(); + for (const record of records) { + store.append(resource, SCOPE, record); + } + store.flush(); + const ingestMs = nowMillis() - ingestStart; + + const stats = store.stats(); + const outputBytes = stats.totalChunkBytes; + const bpl = bytesPerLog(outputBytes, count); + const rvr = ratioFn(rawTextBytes, outputBytes); + const rvn = ratioFn(rawNdjsonBytes, outputBytes); + + process.stderr.write( + ` → ${bpl.toFixed(2)} B/log | ` + + `${rvr.toFixed(1)}× vs text | ` + + `${rvn.toFixed(1)}× vs ndjson | ` + + `${(count / (ingestMs / 1000)).toFixed(0)} records/s | ` + + `${stats.chunks} chunks\n` + ); + + return { + corpus: label, + codec: "typed-columnar-zstd19", + inputBytes: rawNdjsonBytes, + outputBytes, + logCount: count, + bytesPerLog: bpl, + ratioVsRaw: rvr, + ratioVsNdjson: rvn, + encodeMillis: ingestMs, + }; +} + +export default async function run() { + process.stderr.write("\n═══ Comprehensive Storage Benchmark ═══\n\n"); + const results: CompressionResult[] = []; + + for (const c of CASES) { + results.push(runCase(c)); + } + + // Summary table by corpus type (averages across sizes) + process.stderr.write("\n─── Summary by corpus type (10K) ───\n"); + for (const type of Object.keys(CORPUS_GENERATORS) as SyntheticCorpusType[]) { + const row = results.find((r) => r.corpus === `${type}/10k`); + if (row) { + process.stderr.write( + ` ${type.padEnd(18)} ${row.bytesPerLog.toFixed(2).padStart(8)} B/log ` + + `${row.ratioVsNdjson.toFixed(1)}× vs ndjson\n` + ); + } + } + process.stderr.write("\n"); + + return buildReport("comprehensive-storage", results); +} diff --git a/packages/o11ylogsdb/bench/ingest-throughput.bench.ts b/packages/o11ylogsdb/bench/ingest-throughput.bench.ts new file mode 100644 index 00000000..57a36f03 --- /dev/null +++ b/packages/o11ylogsdb/bench/ingest-throughput.bench.ts @@ -0,0 +1,164 @@ +/** + * ingest-throughput.bench.ts — Sustained ingest throughput across corpus types. + * + * Measures records/second and MB/second for each corpus type at 100K scale. + * Tests both the append path and the flush/encode path separately, then + * combined end-to-end throughput. + * + * Also measures memory efficiency: peak RSS and heap per 10K records. + */ + +import { + defaultRegistry, + GzipCodec, + type InstrumentationScope, + type LogRecord, + LogStore, + type Resource, + TypedColumnarDrainPolicy, + ZstdCodec, +} from "../dist/index.js"; +import { + CORPUS_GENERATORS, + type SyntheticCorpusType, +} from "./synthetic-corpora.js"; +import { nowMillis } from "./harness.js"; + +const SCOPE: InstrumentationScope = { name: "bench-ingest", version: "0.0.0" }; +const RECORD_COUNT = 100_000; + +function buildResource(corpusType: string): Resource { + return { + attributes: [ + { key: "service.name", value: `bench-${corpusType}` }, + ], + }; +} + +interface IngestResult { + corpusType: string; + recordCount: number; + totalRawBytes: number; + totalChunkBytes: number; + ingestMs: number; + recordsPerSecond: number; + rawMBPerSecond: number; + bytesPerLog: number; + chunkCount: number; + peakHeapMB: number; + peakRssMB: number; +} + +function measureIngest(corpusType: SyntheticCorpusType): IngestResult { + const records = CORPUS_GENERATORS[corpusType](RECORD_COUNT); + + let totalRawBytes = 0; + for (const r of records) { + const body = typeof r.body === "string" ? r.body : JSON.stringify(r.body); + totalRawBytes += body.length; + } + + const store = new LogStore({ + registry: defaultRegistry() + .register(new GzipCodec(6)) + .register(new ZstdCodec(3)) + .register(new ZstdCodec(19)), + policy: new TypedColumnarDrainPolicy({ bodyCodec: "zstd-19" }), + rowsPerChunk: 1024, + }); + + const resource = buildResource(corpusType); + + // Force GC before measurement + const g = globalThis as { gc?: () => void }; + if (typeof g.gc === "function") g.gc(); + const memBefore = process.memoryUsage(); + + const t0 = nowMillis(); + for (const record of records) { + store.append(resource, SCOPE, record); + } + store.flush(); + const t1 = nowMillis(); + + const memAfter = process.memoryUsage(); + const ingestMs = t1 - t0; + const stats = store.stats(); + + return { + corpusType, + recordCount: RECORD_COUNT, + totalRawBytes, + totalChunkBytes: stats.totalChunkBytes, + ingestMs, + recordsPerSecond: RECORD_COUNT / (ingestMs / 1000), + rawMBPerSecond: (totalRawBytes / 1_000_000) / (ingestMs / 1000), + bytesPerLog: stats.totalChunkBytes / RECORD_COUNT, + chunkCount: stats.chunks, + peakHeapMB: Math.max(memBefore.heapUsed, memAfter.heapUsed) / 1_000_000, + peakRssMB: Math.max(memBefore.rss, memAfter.rss) / 1_000_000, + }; +} + +const CORPUS_TYPES: SyntheticCorpusType[] = [ + "syslog", + "structured", + "high-cardinality", + "cloud-native", + "mixed", +]; + +export default async function run() { + process.stderr.write("\n═══ Ingest Throughput Benchmark (100K records per corpus) ═══\n\n"); + const results: IngestResult[] = []; + + // Warmup pass + process.stderr.write(" Warmup…\n"); + CORPUS_GENERATORS["syslog"](1000); + + for (const corpusType of CORPUS_TYPES) { + process.stderr.write(` ${corpusType}… `); + const result = measureIngest(corpusType); + results.push(result); + process.stderr.write( + `${(result.recordsPerSecond / 1000).toFixed(0)}K rec/s | ` + + `${result.rawMBPerSecond.toFixed(1)} MB/s raw | ` + + `${result.bytesPerLog.toFixed(2)} B/log | ` + + `heap=${result.peakHeapMB.toFixed(0)}MB\n` + ); + } + + // Summary table + process.stderr.write("\n─── Ingest Throughput Summary ───\n"); + process.stderr.write( + " " + + "corpus".padEnd(18) + + "rec/s".padEnd(12) + + "MB/s".padEnd(10) + + "B/log".padEnd(10) + + "chunks".padEnd(10) + + "heap MB".padEnd(10) + + "\n" + ); + for (const r of results) { + process.stderr.write( + " " + + r.corpusType.padEnd(18) + + `${(r.recordsPerSecond / 1000).toFixed(0)}K`.padEnd(12) + + r.rawMBPerSecond.toFixed(1).padEnd(10) + + r.bytesPerLog.toFixed(2).padEnd(10) + + String(r.chunkCount).padEnd(10) + + r.peakHeapMB.toFixed(0).padEnd(10) + + "\n" + ); + } + process.stderr.write("\n"); + + return { + module: "ingest-throughput", + timestamp: new Date().toISOString(), + commit: process.env.GIT_COMMIT ?? null, + node: process.version, + results, + }; +} diff --git a/packages/o11ylogsdb/bench/run.mjs b/packages/o11ylogsdb/bench/run.mjs index 31784473..2aeab9fe 100644 --- a/packages/o11ylogsdb/bench/run.mjs +++ b/packages/o11ylogsdb/bench/run.mjs @@ -51,6 +51,9 @@ const modules = { "drain-churn": "../dist-bench/drain-churn.bench.js", compaction: "../dist-bench/compaction.bench.js", "pino-query": "../dist-bench/pino-query.bench.js", + "comprehensive-storage": "../dist-bench/comprehensive-storage.bench.js", + "comprehensive-query": "../dist-bench/comprehensive-query.bench.js", + "ingest-throughput": "../dist-bench/ingest-throughput.bench.js", }; const harnessImport = await import("../dist-bench/harness.js"); diff --git a/packages/o11ylogsdb/bench/synthetic-corpora.ts b/packages/o11ylogsdb/bench/synthetic-corpora.ts new file mode 100644 index 00000000..2ed63a09 --- /dev/null +++ b/packages/o11ylogsdb/bench/synthetic-corpora.ts @@ -0,0 +1,365 @@ +/** + * Synthetic corpus generators for comprehensive benchmarking. + * + * Covers the major log workload shapes seen in production: + * - Templated syslog (Linux/HDFS-like) + * - Structured JSON (Pino/Winston-like HTTP service logs) + * - High-cardinality (UUIDs, trace IDs, request IDs in every record) + * - Cloud-native (Kubernetes events, Docker container logs) + * - Mixed (realistic service with all body types) + * + * Each generator produces LogRecord[] suitable for direct ingest into + * LogStore. Sizes: 1K, 10K, 100K, 1M (configurable). + */ + +import type { AnyValue, LogRecord } from "../dist/index.js"; + +// ── Random helpers ─────────────────────────────────────────────────── + +let _seed = 42; +function setSeed(s: number) { + _seed = s; +} +function rand(): number { + let t = (_seed += 0x6d2b79f5); + t = Math.imul(t ^ (t >>> 15), t | 1); + t ^= t + Math.imul(t ^ (t >>> 7), t | 61); + return ((t ^ (t >>> 14)) >>> 0) / 4294967296; +} +function randInt(min: number, max: number): number { + return min + Math.floor(rand() * (max - min + 1)); +} +function pick(arr: T[]): T { + return arr[Math.floor(rand() * arr.length)]!; +} +function uuid(): string { + const h = "0123456789abcdef"; + let s = ""; + for (let i = 0; i < 32; i++) { + if (i === 8 || i === 12 || i === 16 || i === 20) s += "-"; + s += h[Math.floor(rand() * 16)]; + } + return s; +} +function shortHex(n: number): string { + return Math.floor(rand() * (16 ** n)) + .toString(16) + .padStart(n, "0"); +} + +// ── Severity distribution ──────────────────────────────────────────── + +const SEVERITY_WEIGHTS = [ + // [severityNumber, severityText, weight] + [1, "TRACE", 3], + [5, "DEBUG", 12], + [9, "INFO", 60], + [13, "WARN", 15], + [17, "ERROR", 8], + [21, "FATAL", 2], +] as const; + +function randomSeverity(): { severityNumber: number; severityText: string } { + const total = SEVERITY_WEIGHTS.reduce((s, [, , w]) => s + w, 0); + let r = rand() * total; + for (const [num, text, w] of SEVERITY_WEIGHTS) { + r -= w; + if (r <= 0) return { severityNumber: num, severityText: text }; + } + return { severityNumber: 9, severityText: "INFO" }; +} + +// ── Corpus: Templated Syslog ───────────────────────────────────────── + +const SYSLOG_TEMPLATES = [ + "sshd[{pid}]: Accepted publickey for {user} from {ip} port {port} ssh2: RSA SHA256:{hash}", + "sshd[{pid}]: pam_unix(sshd:session): session opened for user {user} by (uid=0)", + "sshd[{pid}]: Disconnected from {ip} port {port}", + "kernel: [{uptime}] eth0: link up at {speed} Mbps, full duplex", + "kernel: [{uptime}] Out of memory: Kill process {pid} ({process}) score {score}", + "systemd[1]: Started {service}.service - {description}.", + "systemd[1]: Stopping {service}.service...", + "systemd[1]: {service}.service: Main process exited, code=exited, status={code}", + "CRON[{pid}]: (root) CMD ({command})", + "sudo: {user} : TTY=pts/{tty} ; PWD={path} ; USER=root ; COMMAND={command}", + "dhclient[{pid}]: DHCPREQUEST on {iface} to {ip} port 67", + "kernel: [{uptime}] audit: backlog limit exceeded", + "sshd[{pid}]: Failed password for invalid user {user} from {ip} port {port} ssh2", + "kernel: [{uptime}] TCP: request_sock_TCP: Possible SYN flooding on port {port}. Sending cookies.", + "rsyslogd: [{uptime}] action '{action}' resumed (module '{module}')", +]; + +const USERS = ["root", "admin", "deploy", "www-data", "postgres", "nginx", "app"]; +const SERVICES = ["nginx", "postgresql", "redis", "docker", "kubelet", "containerd", "etcd"]; +const COMMANDS = ["/usr/sbin/logrotate /etc/logrotate.conf", "run-parts /etc/cron.hourly", "/usr/bin/apt-get update -q"]; + +function fillSyslogTemplate(tmpl: string): string { + return tmpl + .replace(/\{pid\}/g, () => String(randInt(1000, 65535))) + .replace(/\{user\}/g, () => pick(USERS)) + .replace(/\{ip\}/g, () => `${randInt(10, 192)}.${randInt(0, 255)}.${randInt(0, 255)}.${randInt(1, 254)}`) + .replace(/\{port\}/g, () => String(randInt(1024, 65535))) + .replace(/\{hash\}/g, () => shortHex(40)) + .replace(/\{uptime\}/g, () => `${randInt(1, 99999)}.${randInt(100, 999)}`) + .replace(/\{speed\}/g, () => pick(["100", "1000", "10000"])) + .replace(/\{process\}/g, () => pick(["java", "python3", "node", "postgres", "redis-server"])) + .replace(/\{score\}/g, () => String(randInt(100, 999))) + .replace(/\{service\}/g, () => pick(SERVICES)) + .replace(/\{description\}/g, () => pick(["HTTP server", "Database", "Cache", "Message queue"])) + .replace(/\{code\}/g, () => String(pick([0, 1, 2, 137, 143]))) + .replace(/\{command\}/g, () => pick(COMMANDS)) + .replace(/\{tty\}/g, () => String(randInt(0, 9))) + .replace(/\{path\}/g, () => pick(["/root", "/home/deploy", "/var/log"])) + .replace(/\{iface\}/g, () => pick(["eth0", "ens5", "wlan0"])) + .replace(/\{action\}/g, () => pick(["action-1-builtin:omfile", "action-3-builtin:ommysql"])) + .replace(/\{module\}/g, () => pick(["builtin:omfile", "builtin:ommysql"])); +} + +export function generateSyslogCorpus(count: number, seed = 42): LogRecord[] { + setSeed(seed); + const records: LogRecord[] = []; + const baseNs = BigInt(Date.now()) * 1_000_000n; + for (let i = 0; i < count; i++) { + const { severityNumber, severityText } = randomSeverity(); + records.push({ + timeUnixNano: baseNs + BigInt(i) * 1_000_000_000n, + severityNumber, + severityText, + body: fillSyslogTemplate(pick(SYSLOG_TEMPLATES)), + attributes: [{ key: "host", value: `server-${randInt(1, 20)}` }], + }); + } + return records; +} + +// ── Corpus: Structured JSON (HTTP service) ─────────────────────────── + +const HTTP_METHODS = ["GET", "POST", "PUT", "DELETE", "PATCH"]; +const HTTP_PATHS = [ + "/api/v2/users", "/api/v2/orders", "/api/v2/products", + "/api/v2/checkout", "/api/v2/search", "/health", "/metrics", +]; +const HTTP_STATUS = [200, 200, 200, 200, 201, 204, 301, 400, 401, 403, 404, 500, 502, 503]; + +export function generateStructuredCorpus(count: number, seed = 42): LogRecord[] { + setSeed(seed); + const records: LogRecord[] = []; + const baseNs = BigInt(Date.now()) * 1_000_000n; + for (let i = 0; i < count; i++) { + const { severityNumber, severityText } = randomSeverity(); + const method = pick(HTTP_METHODS); + const path = pick(HTTP_PATHS); + const status = pick(HTTP_STATUS); + const duration = randInt(1, 5000); + + const body: { [key: string]: AnyValue } = { + msg: `${method} ${path} ${status} ${duration}ms`, + level: severityText.toLowerCase(), + req: { + method, + url: path, + headers: { "x-request-id": uuid(), "user-agent": "Mozilla/5.0" }, + user_id: `usr-${shortHex(8)}`, + }, + res: { status, duration_ms: duration, bytes: randInt(100, 50000) }, + time: Date.now() + i * 1000, + }; + + if (severityNumber >= 17) { + body.error = { + type: pick(["TimeoutError", "ConnectionError", "ValidationError"]), + message: pick(["deadline exceeded", "connection reset", "invalid input"]), + }; + } + + records.push({ + timeUnixNano: baseNs + BigInt(i) * 1_000_000_000n, + severityNumber, + severityText, + body, + attributes: [ + { key: "service.name", value: pick(["api-gateway", "user-service", "order-service"]) }, + { key: "deployment.environment", value: pick(["production", "staging"]) }, + ], + }); + } + return records; +} + +// ── Corpus: High-Cardinality ───────────────────────────────────────── + +const HC_TEMPLATES = [ + "Processing request {requestId} for user {userId} trace={traceId}", + "Cache lookup key={cacheKey} result={result} latency={latency}ms", + "Database query tx={txId} table={table} rows={rows} duration={duration}ms", + "Message consumed topic={topic} partition={partition} offset={offset} key={msgKey}", + "Span completed spanId={spanId} traceId={traceId} duration={duration}ns", +]; + +export function generateHighCardinalityCorpus(count: number, seed = 42): LogRecord[] { + setSeed(seed); + const records: LogRecord[] = []; + const baseNs = BigInt(Date.now()) * 1_000_000n; + for (let i = 0; i < count; i++) { + const { severityNumber, severityText } = randomSeverity(); + const tmpl = pick(HC_TEMPLATES); + const body = tmpl + .replace("{requestId}", uuid()) + .replace("{userId}", `usr-${shortHex(8)}`) + .replace("{traceId}", shortHex(32)) + .replace("{spanId}", shortHex(16)) + .replace("{cacheKey}", `${pick(["user", "session", "product"])}:${shortHex(8)}`) + .replace("{result}", pick(["HIT", "MISS"])) + .replace("{latency}", String(randInt(1, 500))) + .replace("{txId}", `tx-${shortHex(8)}`) + .replace("{table}", pick(["users", "orders", "products", "sessions"])) + .replace("{rows}", String(randInt(1, 100000))) + .replace("{duration}", String(randInt(1, 10000))) + .replace("{topic}", pick(["orders.created", "payments.processed", "events.raw"])) + .replace("{partition}", String(randInt(0, 15))) + .replace("{offset}", String(randInt(100000, 9999999))) + .replace("{msgKey}", shortHex(16)); + + const traceId = new Uint8Array(16); + const spanId = new Uint8Array(8); + for (let j = 0; j < 16; j++) traceId[j] = Math.floor(rand() * 256); + for (let j = 0; j < 8; j++) spanId[j] = Math.floor(rand() * 256); + + records.push({ + timeUnixNano: baseNs + BigInt(i) * 1_000_000_000n, + severityNumber, + severityText, + body, + attributes: [ + { key: "service.name", value: pick(["gateway", "processor", "indexer"]) }, + { key: "request.id", value: uuid() }, + ], + traceId, + spanId, + }); + } + return records; +} + +// ── Corpus: Cloud-Native (K8s/Docker) ──────────────────────────────── + +const K8S_TEMPLATES = [ + 'I{timestamp} {pid} {file}:{line}] "Trying to schedule pod {namespace}/{pod}"', + 'I{timestamp} {pid} {file}:{line}] "Successfully assigned {namespace}/{pod} to {node}"', + 'W{timestamp} {pid} {file}:{line}] "FailedScheduling: {reason}"', + 'E{timestamp} {pid} {file}:{line}] "Error syncing pod {podId}: {error}"', + "time=\"{iso}\" level={level} msg=\"{msg}\" container={container} namespace={namespace}", + "{iso} stdout F {json_line}", + "level={level} ts={iso} caller={caller} msg=\"{msg}\" component={component}", +]; +const K8S_NAMESPACES = ["default", "kube-system", "monitoring", "production", "staging"]; +const K8S_NODES = ["node-01", "node-02", "node-03", "node-04"]; +const K8S_PODS = ["api-deploy-7b5c4", "worker-deploy-3f2a1", "redis-0", "postgres-0", "nginx-ingress-abc12"]; +const K8S_ERRORS = ["CrashLoopBackOff", "OOMKilled", "ImagePullBackOff", "ErrImagePull", "ContainerCreating"]; + +export function generateCloudNativeCorpus(count: number, seed = 42): LogRecord[] { + setSeed(seed); + const records: LogRecord[] = []; + const baseNs = BigInt(Date.now()) * 1_000_000n; + for (let i = 0; i < count; i++) { + const { severityNumber, severityText } = randomSeverity(); + const tmpl = pick(K8S_TEMPLATES); + const body = tmpl + .replace("{timestamp}", `${randInt(0, 1231)}${randInt(10, 12)}${randInt(10, 28)} ${randInt(10, 23)}:${randInt(10, 59)}:${randInt(10, 59)}.${randInt(100, 999)}`) + .replace("{pid}", String(randInt(1, 9))) + .replace("{file}", pick(["scheduler.go", "kubelet.go", "controller.go", "pod.go"])) + .replace("{line}", String(randInt(100, 999))) + .replace("{namespace}", pick(K8S_NAMESPACES)) + .replace("{pod}", pick(K8S_PODS)) + .replace("{node}", pick(K8S_NODES)) + .replace("{reason}", pick(K8S_ERRORS)) + .replace("{podId}", shortHex(12)) + .replace("{error}", pick(["context deadline exceeded", "connection refused", "no such container"])) + .replace("{iso}", new Date(Date.now() - randInt(0, 86400000)).toISOString()) + .replace("{level}", severityText.toLowerCase()) + .replace("{msg}", pick(["container started", "health check failed", "pulling image", "sync complete"])) + .replace("{container}", `${pick(["api", "sidecar", "init"])}`) + .replace("{caller}", `${pick(["main.go", "server.go", "handler.go"])}:${randInt(10, 500)}`) + .replace("{component}", pick(["kube-scheduler", "kube-controller-manager", "kubelet"])) + .replace("{json_line}", JSON.stringify({ ts: Date.now(), msg: pick(["request handled", "query executed"]) })); + + records.push({ + timeUnixNano: baseNs + BigInt(i) * 1_000_000_000n, + severityNumber, + severityText, + body, + attributes: [ + { key: "k8s.namespace.name", value: pick(K8S_NAMESPACES) }, + { key: "k8s.pod.name", value: pick(K8S_PODS) }, + { key: "k8s.node.name", value: pick(K8S_NODES) }, + ], + }); + } + return records; +} + +// ── Corpus: Mixed (realistic service) ──────────────────────────────── + +export function generateMixedCorpus(count: number, seed = 42): LogRecord[] { + setSeed(seed); + const records: LogRecord[] = []; + const baseNs = BigInt(Date.now()) * 1_000_000n; + + for (let i = 0; i < count; i++) { + const roll = rand(); + let record: LogRecord; + + if (roll < 0.4) { + // 40% syslog-style templated + const [r] = generateSyslogCorpus(1, _seed); + record = r!; + _seed += 7; + } else if (roll < 0.75) { + // 35% structured JSON + const [r] = generateStructuredCorpus(1, _seed); + record = r!; + _seed += 7; + } else if (roll < 0.9) { + // 15% high-cardinality + const [r] = generateHighCardinalityCorpus(1, _seed); + record = r!; + _seed += 7; + } else { + // 10% cloud-native + const [r] = generateCloudNativeCorpus(1, _seed); + record = r!; + _seed += 7; + } + + // Override timestamp to maintain ordering + record!.timeUnixNano = baseNs + BigInt(i) * 1_000_000_000n; + records.push(record!); + } + return records; +} + +// ── Corpus Sizes ───────────────────────────────────────────────────── + +export type CorpusSize = "1k" | "10k" | "100k" | "1m"; + +export const CORPUS_SIZES: Record = { + "1k": 1_000, + "10k": 10_000, + "100k": 100_000, + "1m": 1_000_000, +}; + +export type SyntheticCorpusType = + | "syslog" + | "structured" + | "high-cardinality" + | "cloud-native" + | "mixed"; + +export const CORPUS_GENERATORS: Record LogRecord[]> = { + "syslog": generateSyslogCorpus, + "structured": generateStructuredCorpus, + "high-cardinality": generateHighCardinalityCorpus, + "cloud-native": generateCloudNativeCorpus, + "mixed": generateMixedCorpus, +}; diff --git a/packages/o11ylogsdb/src/query.ts b/packages/o11ylogsdb/src/query.ts index 8ca6d071..65d4411c 100644 --- a/packages/o11ylogsdb/src/query.ts +++ b/packages/o11ylogsdb/src/query.ts @@ -172,6 +172,9 @@ function freshStats(): QueryStats { } function nowMillis(): number { + if (typeof performance !== "undefined" && performance.now) { + return performance.now(); + } return Number(process.hrtime.bigint()) / 1_000_000; } diff --git a/site/logsdb-engine/css/base.css b/site/logsdb-engine/css/base.css new file mode 100644 index 00000000..85b3157b --- /dev/null +++ b/site/logsdb-engine/css/base.css @@ -0,0 +1,17 @@ +/* ── LogsDB Engine — Base Styles ────────────────────────────────────── */ +/* Inherits the o11ykit brand system from ../styles.css */ + +:root { + --panel-bg: var(--paper, #F2EFE7); + --panel-border: var(--ink, #11110F); + --accent: var(--signal, oklch(0.66 0.16 42)); + --mono: var(--mono, "JetBrains Mono", monospace); + --sans: var(--sans, "Inter Tight", sans-serif); + --display: var(--display, "Fraunces", serif); + --severity-trace: #6b7280; + --severity-debug: #3b82f6; + --severity-info: #10b981; + --severity-warn: #f59e0b; + --severity-error: #ef4444; + --severity-fatal: #dc2626; +} diff --git a/site/logsdb-engine/css/demo.css b/site/logsdb-engine/css/demo.css new file mode 100644 index 00000000..9f1315be --- /dev/null +++ b/site/logsdb-engine/css/demo.css @@ -0,0 +1,427 @@ +/* ── LogsDB Engine — Demo Styles ────────────────────────────────────── */ + +/* ─── Hero & Layout ───────────────────────────────────────────────── */ + +.page { max-width: 1440px; margin: 0 auto; padding: 0 1rem; } + +.hero { + text-align: center; + padding: 3rem 1rem; + border-bottom: 1px solid var(--panel-border); +} +.hero h1 { font-family: var(--display); font-size: clamp(1.8rem, 4vw, 3rem); margin: 0 0 1rem; } +.hero .lede { font-family: var(--sans); font-size: 1.1rem; max-width: 640px; margin: 0 auto 1.5rem; opacity: 0.8; } +.eyebrow { + font-family: var(--mono); + font-size: 0.75rem; + text-transform: uppercase; + letter-spacing: 0.1em; + opacity: 0.6; + margin-bottom: 0.5rem; +} + +/* ─── Dataset Selector ────────────────────────────────────────────── */ + +.dataset-section { + padding: 2rem 0; + border-bottom: 1px solid var(--panel-border); +} +.dataset-section h2 { font-family: var(--sans); font-size: 1.3rem; margin: 0 0 1rem; } + +.dataset-buttons { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); + gap: 0.75rem; +} + +.dataset-btn { + display: flex; + flex-direction: column; + padding: 1rem; + border: 1px solid var(--panel-border); + background: var(--panel-bg); + cursor: pointer; + transition: border-color 0.15s, transform 0.1s; + font-family: var(--sans); + text-align: left; +} +.dataset-btn:hover { border-color: var(--accent); transform: translateY(-1px); } +.dataset-btn:disabled { opacity: 0.5; cursor: not-allowed; transform: none; } +.dataset-label { font-weight: 600; font-size: 0.9rem; margin-bottom: 0.3rem; } +.dataset-desc { font-size: 0.75rem; opacity: 0.6; } + +/* ─── Progress ────────────────────────────────────────────────────── */ + +#gen-progress { + margin: 1rem 0; + padding: 1rem; + border: 1px solid var(--panel-border); + background: var(--panel-bg); +} +#gen-progress[hidden] { display: none; } +#gen-status { font-family: var(--mono); font-size: 0.85rem; margin-bottom: 0.5rem; } +.progress-track { + height: 4px; + background: var(--panel-border); + border-radius: 2px; + overflow: hidden; +} +.progress-fill { + height: 100%; + background: var(--accent); + width: 0%; + transition: width 0.2s; +} + +/* ─── Stats Panel ─────────────────────────────────────────────────── */ + +.stats-panel { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(130px, 1fr)); + gap: 0.5rem; + padding: 1.5rem 0; + border-bottom: 1px solid var(--panel-border); +} +.stats-panel[hidden] { display: none; } + +.stat-item { + text-align: center; + padding: 0.75rem 0.5rem; + border: 1px solid var(--panel-border); +} +.stat-value { + font-family: var(--mono); + font-size: 1.2rem; + font-weight: 700; + display: block; +} +.stat-label { + font-family: var(--sans); + font-size: 0.7rem; + text-transform: uppercase; + letter-spacing: 0.05em; + opacity: 0.6; + margin-top: 0.25rem; + display: block; +} + +/* ─── Tab Navigation ──────────────────────────────────────────────── */ + +.tabs-panel { padding: 1rem 0; } +.tabs-panel[hidden] { display: none; } + +.tab-bar { + display: flex; + gap: 0; + border: 1px solid var(--panel-border); + border-bottom: none; + margin-bottom: 0; +} +.tab-btn { + flex: 1; + padding: 0.75rem 1rem; + border: none; + border-right: 1px solid var(--panel-border); + background: transparent; + font-family: var(--mono); + font-size: 0.8rem; + cursor: pointer; + transition: background 0.15s; +} +.tab-btn:last-child { border-right: none; } +.tab-btn:hover { background: rgba(0,0,0,0.03); } +.tab-btn.active { + background: var(--panel-border); + color: var(--panel-bg); + font-weight: 600; +} + +.tab-content { + border: 1px solid var(--panel-border); + padding: 1.5rem; + min-height: 400px; +} +.tab-content[hidden] { display: none; } + +/* ─── Data Tables ─────────────────────────────────────────────────── */ + +.data-table { + width: 100%; + border-collapse: collapse; + font-family: var(--mono); + font-size: 0.8rem; +} +.data-table th, .data-table td { + padding: 0.5rem 0.75rem; + border: 1px solid var(--panel-border); + text-align: left; +} +.data-table th { + background: var(--panel-border); + color: var(--panel-bg); + font-weight: 600; + font-size: 0.7rem; + text-transform: uppercase; + letter-spacing: 0.05em; +} + +/* ─── Chunk Grid ──────────────────────────────────────────────────── */ + +.chunk-grid { + display: grid; + grid-template-columns: repeat(auto-fill, minmax(220px, 1fr)); + gap: 0.5rem; + margin-top: 1rem; +} + +.chunk-card { + border: 1px solid var(--panel-border); + padding: 0.75rem; + font-family: var(--mono); + font-size: 0.75rem; +} +.chunk-header { + display: flex; + justify-content: space-between; + margin-bottom: 0.5rem; +} +.chunk-service { font-weight: 600; } +.chunk-meta { opacity: 0.5; } +.chunk-stats { + display: flex; + flex-wrap: wrap; + gap: 0.5rem; + font-size: 0.7rem; + opacity: 0.7; +} +.chunk-bar { + margin-top: 0.5rem; + height: 3px; + background: rgba(0,0,0,0.1); + border-radius: 1.5px; + overflow: hidden; +} +.chunk-bar-fill { + height: 100%; + width: calc(var(--ratio) * 100%); + background: var(--accent); +} + +/* ─── Service Cards ───────────────────────────────────────────────── */ + +#service-health { + display: grid; + grid-template-columns: repeat(auto-fill, minmax(250px, 1fr)); + gap: 0.75rem; + margin-bottom: 1.5rem; +} + +.service-card { + border: 1px solid var(--panel-border); + padding: 1rem; +} +.service-card.service-unhealthy { + border-color: var(--severity-error); + border-width: 2px; +} +.service-name { + font-family: var(--mono); + font-weight: 700; + font-size: 0.9rem; + margin-bottom: 0.5rem; +} +.service-stats { + display: flex; + flex-wrap: wrap; + gap: 0.75rem; + font-family: var(--mono); + font-size: 0.75rem; + opacity: 0.7; +} +.service-errors { color: var(--severity-error); opacity: 1; } + +/* ─── Error / Template Lists ──────────────────────────────────────── */ + +.error-list, .template-list { margin-top: 0.75rem; } + +.error-item, .template-item { + border: 1px solid var(--panel-border); + padding: 0.75rem; + margin-bottom: 0.5rem; +} +.error-body code, .template-pattern { + font-family: var(--mono); + font-size: 0.75rem; + word-break: break-all; +} +.error-meta { + display: flex; + gap: 1rem; + margin-top: 0.4rem; + font-size: 0.7rem; + opacity: 0.7; +} +.error-count { color: var(--severity-error); font-weight: 600; } +.template-item { + display: flex; + justify-content: space-between; + align-items: center; +} +.template-count { + font-family: var(--mono); + font-size: 0.75rem; + font-weight: 600; + opacity: 0.6; +} + +/* ─── Query Builder ───────────────────────────────────────────────── */ + +.query-row { + display: flex; + align-items: center; + gap: 0.75rem; + margin-bottom: 0.75rem; + font-family: var(--mono); + font-size: 0.8rem; +} +.query-row label { display: flex; align-items: center; gap: 0.4rem; min-width: 140px; } +.query-row input[type="text"], +.query-row input[type="number"], +.query-row select { + padding: 0.4rem 0.6rem; + border: 1px solid var(--panel-border); + background: var(--panel-bg); + font-family: var(--mono); + font-size: 0.8rem; + flex: 1; + max-width: 250px; +} +.query-row input:disabled, .query-row select:disabled { opacity: 0.4; } + +.query-actions { margin-top: 1rem; } + +/* ─── Query Results ───────────────────────────────────────────────── */ + +.query-stats-bar { + display: flex; + flex-wrap: wrap; + gap: 1rem; + padding: 0.75rem 1rem; + border: 1px solid var(--panel-border); + background: rgba(0,0,0,0.02); + font-family: var(--mono); + font-size: 0.8rem; + margin-bottom: 1rem; +} +.qs-item strong { color: var(--accent); } + +.query-distributions { + margin-bottom: 1rem; +} +.dist-severity { + display: flex; + flex-wrap: wrap; + gap: 0.4rem; +} +.sev-badge { + font-family: var(--mono); + font-size: 0.7rem; + padding: 0.2rem 0.5rem; + border: 1px solid var(--sev-color); + color: var(--sev-color); + border-radius: 2px; +} + +/* ─── Log Table ───────────────────────────────────────────────────── */ + +.log-table-wrap { overflow-x: auto; } + +.log-table { + width: 100%; + border-collapse: collapse; + font-family: var(--mono); + font-size: 0.75rem; +} +.log-table th, .log-table td { + padding: 0.35rem 0.5rem; + border-bottom: 1px solid rgba(0,0,0,0.08); + text-align: left; + vertical-align: top; +} +.log-table th { + background: var(--panel-border); + color: var(--panel-bg); + font-size: 0.7rem; + text-transform: uppercase; + letter-spacing: 0.03em; + position: sticky; + top: 0; +} +.log-time { white-space: nowrap; opacity: 0.7; } +.log-svc code { font-size: 0.7rem; } +.log-body code { font-size: 0.7rem; word-break: break-word; } + +.sev-pill { + display: inline-block; + padding: 0.1rem 0.3rem; + border-radius: 2px; + color: #fff; + font-size: 0.65rem; + font-weight: 600; + white-space: nowrap; +} + +.log-row.sev-error .log-body code { color: var(--severity-error); } +.log-row.sev-fatal .log-body code { color: var(--severity-fatal); } + +/* ─── Utility ─────────────────────────────────────────────────────── */ + +.muted { font-family: var(--sans); font-size: 0.8rem; opacity: 0.5; margin: 1rem 0; } +h4 { font-family: var(--sans); font-size: 1rem; margin: 0 0 0.5rem; } +[hidden] { display: none !important; } + +/* ─── CTA Buttons ─────────────────────────────────────────────────── */ + +.cta { + display: inline-block; + padding: 0.6rem 1.2rem; + border: 1px solid var(--panel-border); + background: transparent; + font-family: var(--mono); + font-size: 0.8rem; + text-decoration: none; + color: inherit; + cursor: pointer; + transition: background 0.15s, color 0.15s; +} +.cta:hover { background: var(--panel-border); color: var(--panel-bg); } +.cta-primary { + background: var(--panel-border); + color: var(--panel-bg); +} +.cta-primary:hover { background: var(--accent); border-color: var(--accent); } + +/* ─── Top Bar ─────────────────────────────────────────────────────── */ + +.topbar { + display: flex; + align-items: center; + padding: 0.75rem 0; + border-bottom: 1px solid var(--panel-border); + gap: 1.5rem; +} +.brand { + font-family: var(--mono); + font-weight: 700; + font-size: 0.9rem; + text-decoration: none; + color: inherit; + display: flex; + align-items: center; + gap: 0.4rem; +} +.brand-mark { width: 20px; height: 20px; } +.topnav { display: flex; gap: 1rem; font-family: var(--mono); font-size: 0.8rem; } +.topnav a { text-decoration: none; color: inherit; opacity: 0.7; } +.topnav a:hover { opacity: 1; } +.topnav a[aria-current="page"] { opacity: 1; font-weight: 600; border-bottom: 2px solid var(--accent); } diff --git a/site/logsdb-engine/index.html b/site/logsdb-engine/index.html new file mode 100644 index 00000000..13cc2548 --- /dev/null +++ b/site/logsdb-engine/index.html @@ -0,0 +1,145 @@ + + + + + + o11ylogsdb — Browser-Native Log Database Engine + + + + + + + + + + + +
+ + +
+ o11ykit + +
+ + +
+

o11ylogsdb engine

+

A log database
that runs in your browser.

+

+ Generate realistic observability log datasets, inspect byte-level storage + efficiency, and query them in real-time. Drain template extraction + typed + columnar codecs achieve 20–60× compression over raw OTLP/JSON. +

+ +
+ + +
+

Generate Dataset

+

Choose a dataset size. Records are generated deterministically from a realistic microservices workload (6 services, templated + structured + free-text bodies).

+
+ +
+ + + + + + + + + + +
+ + + diff --git a/site/logsdb-engine/js/app.js b/site/logsdb-engine/js/app.js new file mode 100644 index 00000000..d1404b42 --- /dev/null +++ b/site/logsdb-engine/js/app.js @@ -0,0 +1,508 @@ +// @ts-nocheck +// ── LogsDB Engine — App Entry Point ─────────────────────────────────── +// Three-panel interactive experience: +// 1. Storage Explorer — byte-level chunk inspection +// 2. Logs Explorer — curated service health + insights +// 3. Query Builder — full query API exposed in UI + +import { DATASET_PRESETS, generateLogs } from "./data-gen.js"; +import { analyzeStore } from "./logs-model.js"; +import { + buildQuerySpec, + computeSeverityDistribution, + computeServiceDistribution, + createQueryState, + executeQuery, + formatBody, + formatBodyPreview, + formatTimestamp, + severityColor, + severityLabel, +} from "./query-model.js"; +import { + createStore, + getChunkDetails, + getServiceBreakdown, + getStoreStats, + ingestRecords, +} from "./storage-model.js"; + +// ── State ───────────────────────────────────────────────────────────── + +let store = null; +let genStats = null; +let queryState = createQueryState(); +let lastQueryResult = null; +let currentTab = "storage"; + +// ── DOM Helpers ─────────────────────────────────────────────────────── + +function $(id) { + return document.getElementById(id); +} + +function setText(id, text) { + const el = $(id); + if (el) el.textContent = text; +} + +function setHtml(id, html) { + const el = $(id); + if (el) el.innerHTML = html; +} + +function show(id) { + const el = $(id); + if (el) el.hidden = false; +} + +function hide(id) { + const el = $(id); + if (el) el.hidden = true; +} + +function escapeHtml(str) { + return str.replace(/&/g, "&").replace(//g, ">"); +} + +function formatBytes(n) { + if (n >= 1024 * 1024) return `${(n / (1024 * 1024)).toFixed(1)} MB`; + if (n >= 1024) return `${(n / 1024).toFixed(1)} KB`; + return `${n} B`; +} + +function formatNum(n) { + return n.toLocaleString(); +} + +// ── Dataset Generation ─────────────────────────────────────────────── + +function initDatasetButtons() { + const container = $("dataset-buttons"); + if (!container) return; + + container.innerHTML = Object.entries(DATASET_PRESETS) + .map( + ([key, preset]) => ` + + ` + ) + .join(""); + + container.addEventListener("click", (e) => { + const btn = e.target.closest(".dataset-btn"); + if (!btn) return; + const preset = btn.dataset.preset; + generateDataset(preset); + }); +} + +async function generateDataset(presetKey) { + const preset = DATASET_PRESETS[presetKey]; + if (!preset) return; + + // Disable buttons during generation + const buttons = document.querySelectorAll(".dataset-btn"); + buttons.forEach((b) => (b.disabled = true)); + show("gen-progress"); + setText("gen-status", "Generating log records..."); + + // Use requestAnimationFrame to allow UI updates + await new Promise((r) => requestAnimationFrame(r)); + + const t0 = performance.now(); + + // Generate in a setTimeout to not block UI + await new Promise((resolve) => { + setTimeout(() => { + const result = generateLogs({ + count: preset.count, + durationMinutes: preset.durationMinutes, + onProgress: (p) => { + const pct = Math.round(p * 100); + setText("gen-status", `Generating... ${pct}%`); + const bar = $("gen-bar"); + if (bar) bar.style.width = `${pct}%`; + }, + }); + + genStats = result.stats; + const genTime = performance.now() - t0; + + setText("gen-status", `Ingesting ${formatNum(preset.count)} records into LogStore...`); + + // Create store and ingest + store = createStore(); + const ingestResult = ingestRecords(store, result.records); + + const totalTime = performance.now() - t0; + + // Update stats + const storeStats = getStoreStats(store); + setText("stat-logs", formatNum(storeStats.totalLogs)); + setText("stat-bytes-per-log", `${storeStats.bytesPerLogFormatted} B/log`); + setText("stat-compression", `${storeStats.compressionRatio.toFixed(0)}×`); + setText("stat-streams", formatNum(storeStats.streams)); + setText("stat-chunks", formatNum(storeStats.chunks)); + setText("stat-total-bytes", formatBytes(storeStats.totalChunkBytes)); + setText("stat-ingest-rate", `${formatNum(ingestResult.logsPerSecond)} logs/s`); + setText("stat-gen-time", `${totalTime.toFixed(0)}ms`); + + show("stats-panel"); + show("tabs-panel"); + hide("gen-progress"); + + // Render initial tab + renderCurrentTab(); + + buttons.forEach((b) => (b.disabled = false)); + resolve(); + }, 10); + }); +} + +// ── Tab Navigation ─────────────────────────────────────────────────── + +function initTabs() { + const tabBtns = document.querySelectorAll(".tab-btn"); + tabBtns.forEach((btn) => { + btn.addEventListener("click", () => { + currentTab = btn.dataset.tab; + tabBtns.forEach((b) => b.classList.toggle("active", b === btn)); + renderCurrentTab(); + }); + }); +} + +function renderCurrentTab() { + hide("panel-storage"); + hide("panel-logs"); + hide("panel-query"); + show(`panel-${currentTab}`); + + switch (currentTab) { + case "storage": + renderStorageExplorer(); + break; + case "logs": + renderLogsExplorer(); + break; + case "query": + renderQueryBuilder(); + break; + } +} + +// ── Storage Explorer ───────────────────────────────────────────────── + +function renderStorageExplorer() { + if (!store) return; + + const chunks = getChunkDetails(store); + const services = getServiceBreakdown(store); + + // Service breakdown table + setHtml( + "service-breakdown", + ` + + + + + ${services + .map( + (s) => ` + + + + + + + ` + ) + .join("")} + +
ServiceLogsChunksBytesB/logRatio
${escapeHtml(s.name)}${formatNum(s.logs)}${s.chunks}${formatBytes(s.bytes)}${s.bytesPerLog}${s.compressionRatio}×
` + ); + + // Chunk list + const maxChunksShown = 50; + const shownChunks = chunks.slice(0, maxChunksShown); + setHtml( + "chunk-list", + `
+ ${shownChunks + .map( + (c, i) => ` +
+
+ ${escapeHtml(c.service)} + #${c.chunkIndex} +
+
+ ${formatNum(c.nLogs)} logs + ${formatBytes(c.totalBytes)} + ${c.bytesPerLog} B/log + ${c.compressionRatio}× ratio +
+
+
+
+
` + ) + .join("")} +
+ ${chunks.length > maxChunksShown ? `

Showing ${maxChunksShown} of ${chunks.length} chunks

` : ""}` + ); +} + +// ── Logs Explorer ──────────────────────────────────────────────────── + +function renderLogsExplorer() { + if (!store) return; + + setText("logs-loading", "Analyzing..."); + show("logs-loading"); + + // Defer to allow UI update + requestAnimationFrame(() => { + const analysis = analyzeStore(store); + hide("logs-loading"); + + // Service health cards + setHtml( + "service-health", + analysis.services + .map( + (s) => ` +
+
${escapeHtml(s.name)}
+
+ ${formatNum(s.logs)} logs + ${s.errors} errors (${s.errorRate}%) + ${formatBytes(s.bytes)} +
+
` + ) + .join("") + ); + + // Error clusters + if (analysis.errors.length > 0) { + setHtml( + "error-clusters", + `

Error Clusters (${analysis.errors.length} patterns)

+
+ ${analysis.errors + .slice(0, 10) + .map( + (e) => ` +
+
${escapeHtml(e.body.slice(0, 100))}
+
+ ${e.count}× occurrences + ${e.services.join(", ")} +
+
` + ) + .join("")} +
` + ); + } else { + setHtml("error-clusters", "

No errors found.

"); + } + + // Template analysis + if (analysis.templates.length > 0) { + setHtml( + "template-analysis", + `

Top Log Templates

+
+ ${analysis.templates + .slice(0, 10) + .map( + (t) => ` +
+ ${escapeHtml(t.pattern.slice(0, 100))} + ${t.count}× +
` + ) + .join("")} +
` + ); + } + }); +} + +// ── Query Builder ──────────────────────────────────────────────────── + +function renderQueryBuilder() { + if (!store) return; + renderQueryForm(); + if (lastQueryResult) renderQueryResults(lastQueryResult); +} + +function renderQueryForm() { + const form = $("query-form"); + if (!form) return; + + form.innerHTML = ` +
+ + +
+ +
+ + +
+ +
+ + +
+ +
+ + + +
+ +
+ + +
+ +
+ +
+ `; + + // Wire up event handlers + $("run-query-btn").addEventListener("click", handleRunQuery); + + // Checkbox toggles + for (const [prefix, field] of [ + ["qf-severity", "severity"], + ["qf-body", "bodyContains"], + ["qf-resource", "resourceEquals"], + ["qf-leaf", "bodyLeafEquals"], + ["qf-limit", "limit"], + ]) { + const cb = $(`${prefix}-en`); + if (cb) + cb.addEventListener("change", () => { + queryState[field].enabled = cb.checked; + renderQueryForm(); + }); + } +} + +function handleRunQuery() { + // Read form state + queryState.severity.enabled = $("qf-severity-en")?.checked ?? false; + queryState.severity.min = $("qf-severity-val")?.value ?? "WARN"; + queryState.bodyContains.enabled = $("qf-body-en")?.checked ?? false; + queryState.bodyContains.value = $("qf-body-val")?.value ?? ""; + queryState.resourceEquals.enabled = $("qf-resource-en")?.checked ?? false; + queryState.resourceEquals.value = $("qf-resource-val")?.value ?? ""; + queryState.bodyLeafEquals.enabled = $("qf-leaf-en")?.checked ?? false; + queryState.bodyLeafEquals.path = $("qf-leaf-path")?.value ?? ""; + queryState.bodyLeafEquals.value = $("qf-leaf-val")?.value ?? ""; + queryState.limit.enabled = $("qf-limit-en")?.checked ?? false; + queryState.limit.value = Number($("qf-limit-val")?.value ?? 100); + + const result = executeQuery(store, queryState); + lastQueryResult = result; + renderQueryResults(result); +} + +function renderQueryResults(result) { + const container = $("query-results"); + if (!container) return; + + const { records, stats } = result; + + // Stats bar + const sevDist = computeSeverityDistribution(records); + const svcDist = computeServiceDistribution(records); + + container.innerHTML = ` +
+ ${formatNum(stats.recordsEmitted)} results + ${stats.totalTimeMs}ms + ${formatNum(stats.chunksScanned)} chunks scanned + ${formatNum(stats.chunksPruned)} chunks pruned + ${stats.decodeMillis.toFixed(1)}ms decode +
+ +
+
+ ${Object.entries(sevDist) + .filter(([, v]) => v > 0) + .map( + ([k, v]) => + `${k}: ${v}` + ) + .join("")} +
+
+ +
+ + + + + + ${records + .slice(0, 200) + .map( + (r) => ` + + + + + + ` + ) + .join("")} + +
TimeSevServiceBody
${formatTimestamp(r.timeUnixNano).slice(11, 23)}${severityLabel(r.severityNumber)}${escapeHtml(r.attributes?.find((a) => a.key === "service.name")?.value ?? "")}${escapeHtml(formatBodyPreview(r.body, 100))}
+ ${records.length > 200 ? `

Showing 200 of ${records.length} results

` : ""} +
+ `; +} + +// ── Init ────────────────────────────────────────────────────────────── + +document.addEventListener("DOMContentLoaded", () => { + initDatasetButtons(); + initTabs(); +}); diff --git a/site/logsdb-engine/js/data-gen.js b/site/logsdb-engine/js/data-gen.js new file mode 100644 index 00000000..b3465d5a --- /dev/null +++ b/site/logsdb-engine/js/data-gen.js @@ -0,0 +1,511 @@ +// @ts-nocheck +// ── Log Data Generator ──────────────────────────────────────────────── +// Generates realistic OTLP log data at scale (100K–2M records). +// Produces a mix of templated text logs, structured JSON (KVList) logs, +// and rare free-text logs matching the body shape distribution: +// ~61% templated, ~39% KVList, <1% free-text. + +// ── Service Definitions ────────────────────────────────────────────── + +const SERVICES = { + "api-gateway": { + templates: [ + "Received {method} request to {path} from {ip}", + "Request completed in {duration}ms with status {status}", + "Rate limit exceeded for client {clientId} on {path}", + "Authentication header missing for {path}", + "Circuit breaker tripped for upstream {service}", + "Retrying request to {service} (attempt {attempt}/3)", + "TLS handshake completed in {duration}ms for {domain}", + "Connection pool at {percent}% capacity ({active}/{max})", + ], + structured: true, + severityWeights: { TRACE: 5, DEBUG: 15, INFO: 60, WARN: 12, ERROR: 7, FATAL: 1 }, + logRate: 2000, + }, + "user-service": { + templates: [ + "User {userId} authenticated via {method}", + "Password reset requested for {email}", + "Session {sessionId} expired after {ttl}s", + "Profile update for user {userId}: {field} changed", + "Failed login attempt for {email} from {ip} (attempt {count})", + "Token refresh issued for session {sessionId}", + "Account locked for {email} after {count} failed attempts", + "RBAC check: user {userId} {result} for {permission}", + ], + structured: true, + severityWeights: { TRACE: 3, DEBUG: 10, INFO: 55, WARN: 20, ERROR: 10, FATAL: 2 }, + logRate: 800, + }, + "order-processor": { + templates: [ + "Order {orderId} created by user {userId} total={amount}", + "Payment processing for order {orderId} via {provider}", + "Payment {result} for order {orderId} ({reason})", + "Inventory reserved: {quantity}x {sku} for order {orderId}", + "Inventory insufficient for {sku}: requested={requested} available={available}", + "Order {orderId} status transition: {from} -> {to}", + "Shipping label generated for order {orderId} carrier={carrier}", + "Refund initiated for order {orderId} amount={amount}", + ], + structured: true, + severityWeights: { TRACE: 2, DEBUG: 8, INFO: 50, WARN: 25, ERROR: 12, FATAL: 3 }, + logRate: 500, + }, + "database": { + templates: [ + "Query executed in {duration}ms: {query}", + "Connection pool: active={active} idle={idle} waiting={waiting}", + "Slow query detected ({duration}ms): {query}", + "Transaction {txId} committed ({tables} tables, {rows} rows)", + "Transaction {txId} rolled back: {reason}", + "Index scan on {table}.{index}: {rows} rows examined", + "Deadlock detected between transactions {txId1} and {txId2}", + "Replication lag: {lag}ms on replica {replica}", + ], + structured: false, + severityWeights: { TRACE: 10, DEBUG: 20, INFO: 40, WARN: 20, ERROR: 8, FATAL: 2 }, + logRate: 1500, + }, + "cache-layer": { + templates: [ + "Cache {result} for key {key} (ttl={ttl}s)", + "Cache eviction: {count} keys removed, freed {bytes} bytes", + "Cache miss ratio: {ratio}% over last {window}s", + "Memory pressure: {used}/{max} MB, evicting LRU entries", + "Warm-up completed: {count} keys loaded in {duration}ms", + "Replication sync with {peer}: {delta} keys transferred", + ], + structured: false, + severityWeights: { TRACE: 8, DEBUG: 25, INFO: 50, WARN: 12, ERROR: 4, FATAL: 1 }, + logRate: 3000, + }, + "message-queue": { + templates: [ + "Message published to {topic} partition={partition} offset={offset}", + "Consumer group {group} committed offset {offset} for {topic}", + "Consumer lag: {lag} messages behind on {topic}/{partition}", + "Dead letter: message {msgId} moved after {retries} retries", + "Partition rebalance: assigned [{partitions}] to consumer {consumerId}", + "Batch produced: {count} messages ({bytes} bytes) to {topic}", + ], + structured: true, + severityWeights: { TRACE: 5, DEBUG: 15, INFO: 60, WARN: 15, ERROR: 4, FATAL: 1 }, + logRate: 4000, + }, +}; + +const SERVICE_NAMES = Object.keys(SERVICES); + +const SEVERITY_LEVELS = { TRACE: 1, DEBUG: 5, INFO: 9, WARN: 13, ERROR: 17, FATAL: 21 }; + +const HTTP_METHODS = ["GET", "POST", "PUT", "DELETE", "PATCH"]; +const HTTP_PATHS = [ + "/api/v2/users", + "/api/v2/orders", + "/api/v2/products", + "/api/v2/cart", + "/api/v2/checkout", + "/api/v2/inventory", + "/api/v2/auth/token", + "/api/v2/search", + "/health", + "/metrics", +]; +const HTTP_STATUS = [200, 200, 200, 200, 201, 204, 301, 400, 401, 403, 404, 500, 502, 503]; +const IPS = [ + "10.0.1.42", + "10.0.2.17", + "10.0.3.99", + "192.168.1.100", + "172.16.0.5", + "10.0.1.200", +]; +const PROVIDERS = ["stripe", "paypal", "square", "braintree"]; +const CARRIERS = ["fedex", "ups", "usps", "dhl"]; +const DB_TABLES = ["users", "orders", "products", "inventory", "sessions", "payments"]; +const TOPICS = ["orders.created", "payments.processed", "inventory.updated", "notifications.send"]; + +// ── Dataset Presets ────────────────────────────────────────────────── + +export const DATASET_PRESETS = { + small: { + label: "Small (10K logs, ~170 KB)", + count: 10_000, + durationMinutes: 5, + description: "Quick demo. Loads instantly.", + }, + medium: { + label: "Medium (100K logs, ~1.7 MB)", + count: 100_000, + durationMinutes: 30, + description: "Realistic workload. Fast queries.", + }, + large: { + label: "Large (500K logs, ~8.5 MB)", + count: 500_000, + durationMinutes: 120, + description: "Stress test. Sub-second queries with pruning.", + }, + massive: { + label: "Massive (2M logs, ~34 MB)", + count: 2_000_000, + durationMinutes: 480, + description: "Production scale. Tests memory efficiency.", + }, +}; + +// ── Random Helpers ─────────────────────────────────────────────────── + +let _seed = 42; +function mulberry32() { + let t = (_seed += 0x6d2b79f5); + t = Math.imul(t ^ (t >>> 15), t | 1); + t ^= t + Math.imul(t ^ (t >>> 7), t | 61); + return ((t ^ (t >>> 14)) >>> 0) / 4294967296; +} + +function randInt(min, max) { + return min + Math.floor(mulberry32() * (max - min + 1)); +} + +function pick(arr) { + return arr[Math.floor(mulberry32() * arr.length)]; +} + +function weightedPick(weights) { + const entries = Object.entries(weights); + const total = entries.reduce((s, [, w]) => s + w, 0); + let r = mulberry32() * total; + for (const [key, weight] of entries) { + r -= weight; + if (r <= 0) return key; + } + return entries[entries.length - 1][0]; +} + +function uuid() { + const hex = "0123456789abcdef"; + let s = ""; + for (let i = 0; i < 32; i++) { + if (i === 8 || i === 12 || i === 16 || i === 20) s += "-"; + s += hex[Math.floor(mulberry32() * 16)]; + } + return s; +} + +function shortId() { + return Math.floor(mulberry32() * 0xffffffff) + .toString(16) + .padStart(8, "0"); +} + +function ip() { + return pick(IPS); +} + +// ── Template Variable Generators ───────────────────────────────────── + +function generateTemplateVars(template, service) { + const vars = {}; + if (template.includes("{method}")) vars.method = pick(HTTP_METHODS); + if (template.includes("{path}")) vars.path = pick(HTTP_PATHS); + if (template.includes("{ip}")) vars.ip = ip(); + if (template.includes("{duration}")) vars.duration = randInt(1, 5000); + if (template.includes("{status}")) vars.status = pick(HTTP_STATUS); + if (template.includes("{clientId}")) vars.clientId = `client-${shortId()}`; + if (template.includes("{service}")) vars.service = pick(SERVICE_NAMES); + if (template.includes("{attempt}")) vars.attempt = randInt(1, 3); + if (template.includes("{domain}")) vars.domain = `${pick(SERVICE_NAMES)}.internal`; + if (template.includes("{percent}")) vars.percent = randInt(40, 98); + if (template.includes("{active}")) vars.active = randInt(10, 200); + if (template.includes("{max}")) vars.max = 256; + if (template.includes("{userId}")) vars.userId = `usr-${shortId()}`; + if (template.includes("{email}")) vars.email = `user${randInt(1, 9999)}@example.com`; + if (template.includes("{sessionId}")) vars.sessionId = uuid(); + if (template.includes("{ttl}")) vars.ttl = pick([300, 900, 1800, 3600, 86400]); + if (template.includes("{field}")) vars.field = pick(["name", "email", "avatar", "preferences"]); + if (template.includes("{count}")) vars.count = randInt(1, 10); + if (template.includes("{result}")) + vars.result = pick(["succeeded", "failed", "denied", "granted", "HIT", "MISS"]); + if (template.includes("{permission}")) + vars.permission = pick(["read:users", "write:orders", "admin:system"]); + if (template.includes("{orderId}")) vars.orderId = `ord-${shortId()}`; + if (template.includes("{amount}")) vars.amount = `$${(randInt(500, 50000) / 100).toFixed(2)}`; + if (template.includes("{provider}")) vars.provider = pick(PROVIDERS); + if (template.includes("{reason}")) + vars.reason = pick(["insufficient_funds", "card_declined", "timeout", "success", "duplicate"]); + if (template.includes("{quantity}")) vars.quantity = randInt(1, 20); + if (template.includes("{sku}")) vars.sku = `SKU-${randInt(1000, 9999)}`; + if (template.includes("{requested}")) vars.requested = randInt(5, 50); + if (template.includes("{available}")) vars.available = randInt(0, 4); + if (template.includes("{from}")) + vars.from = pick(["pending", "processing", "shipped", "delivered"]); + if (template.includes("{to}")) + vars.to = pick(["processing", "shipped", "delivered", "cancelled"]); + if (template.includes("{carrier}")) vars.carrier = pick(CARRIERS); + if (template.includes("{query}")) + vars.query = `SELECT * FROM ${pick(DB_TABLES)} WHERE id = '${shortId()}'`; + if (template.includes("{idle}")) vars.idle = randInt(5, 50); + if (template.includes("{waiting}")) vars.waiting = randInt(0, 10); + if (template.includes("{txId}")) vars.txId = `tx-${shortId()}`; + if (template.includes("{txId1}")) vars.txId1 = `tx-${shortId()}`; + if (template.includes("{txId2}")) vars.txId2 = `tx-${shortId()}`; + if (template.includes("{tables}")) vars.tables = randInt(1, 5); + if (template.includes("{rows}")) vars.rows = randInt(1, 100000); + if (template.includes("{table}")) vars.table = pick(DB_TABLES); + if (template.includes("{index}")) vars.index = `idx_${pick(["id", "created_at", "user_id"])}`; + if (template.includes("{lag}")) vars.lag = randInt(10, 5000); + if (template.includes("{replica}")) vars.replica = `replica-${randInt(1, 3)}`; + if (template.includes("{key}")) vars.key = `${pick(DB_TABLES)}:${shortId()}`; + if (template.includes("{bytes}")) vars.bytes = randInt(1024, 1048576); + if (template.includes("{ratio}")) vars.ratio = (mulberry32() * 30 + 5).toFixed(1); + if (template.includes("{window}")) vars.window = pick([60, 300, 900]); + if (template.includes("{used}")) vars.used = randInt(512, 3800); + if (template.includes("{peer}")) vars.peer = `cache-${randInt(1, 5)}`; + if (template.includes("{delta}")) vars.delta = randInt(100, 50000); + if (template.includes("{topic}")) vars.topic = pick(TOPICS); + if (template.includes("{partition}")) vars.partition = randInt(0, 15); + if (template.includes("{offset}")) vars.offset = randInt(100000, 9999999); + if (template.includes("{group}")) vars.group = `cg-${pick(["orders", "notifications", "analytics"])}`; + if (template.includes("{msgId}")) vars.msgId = uuid(); + if (template.includes("{retries}")) vars.retries = randInt(3, 10); + if (template.includes("{partitions}")) vars.partitions = `${randInt(0, 3)},${randInt(4, 7)}`; + if (template.includes("{consumerId}")) vars.consumerId = `consumer-${shortId()}`; + return vars; +} + +function fillTemplate(template, vars) { + return template.replace(/\{(\w+)\}/g, (_, key) => String(vars[key] ?? "")); +} + +// ── Structured (KVList) Body Generator ─────────────────────────────── + +function generateStructuredBody(service, severity) { + const method = pick(HTTP_METHODS); + const path = pick(HTTP_PATHS); + const status = pick(HTTP_STATUS); + const duration = randInt(1, 5000); + + const body = { + msg: + severity === "ERROR" + ? pick(["Connection refused", "Timeout exceeded", "Internal server error", "OOM killed"]) + : `${method} ${path} ${status} ${duration}ms`, + req: { method, url: path, user_id: `usr-${shortId()}`, request_id: uuid() }, + res: { status, duration_ms: duration, bytes: randInt(100, 50000) }, + }; + + if (severity === "ERROR" || severity === "FATAL") { + body.error = { + type: pick(["TimeoutError", "ConnectionError", "ValidationError", "AuthError"]), + message: pick([ + "deadline exceeded", + "connection reset by peer", + "invalid input", + "token expired", + ]), + stack: `at ${pick(SERVICE_NAMES)}.handler (${pick(SERVICE_NAMES)}.ts:${randInt(10, 500)}:${randInt(1, 40)})`, + }; + } + + return body; +} + +// ── Trace Correlation ──────────────────────────────────────────────── + +function generateTraceContext() { + if (mulberry32() < 0.7) { + const traceId = new Uint8Array(16); + const spanId = new Uint8Array(8); + for (let i = 0; i < 16; i++) traceId[i] = Math.floor(mulberry32() * 256); + for (let i = 0; i < 8; i++) spanId[i] = Math.floor(mulberry32() * 256); + return { traceId, spanId }; + } + return {}; +} + +// ── Main Generator ─────────────────────────────────────────────────── + +/** + * Generate a batch of realistic log records. + * @param {object} opts + * @param {number} opts.count - Number of logs to generate + * @param {number} opts.durationMinutes - Time span of the dataset + * @param {number} [opts.seed] - Random seed (default 42) + * @param {(progress: number) => void} [opts.onProgress] - Progress callback (0-1) + * @returns {{ records: Array, stats: object }} + */ +export function generateLogs(opts) { + const { count, durationMinutes, seed = 42, onProgress } = opts; + _seed = seed; + + const baseTime = Date.now() - durationMinutes * 60 * 1000; + const nsPerMs = 1_000_000n; + const durationNs = BigInt(durationMinutes) * 60n * 1000n * nsPerMs; + + const records = []; + const stats = { + byService: {}, + bySeverity: {}, + bodyTemplated: 0, + bodyKvlist: 0, + bodyFreetext: 0, + totalRecords: 0, + }; + + const BATCH_SIZE = 5000; + let reported = 0; + + for (let i = 0; i < count; i++) { + const serviceName = pick(SERVICE_NAMES); + const serviceDef = SERVICES[serviceName]; + const severity = weightedPick(serviceDef.severityWeights); + const severityNumber = SEVERITY_LEVELS[severity]; + + // Time: uniform distribution across the window with some clustering + const progressFraction = i / count; + const jitter = (mulberry32() - 0.5) * 0.01; + const timeFrac = Math.max(0, Math.min(1, progressFraction + jitter)); + const timeUnixNano = BigInt(baseTime) * nsPerMs + BigInt(Math.floor(Number(durationNs) * timeFrac)); + + // Decide body shape: 61% templated, 39% structured, <1% freetext + const bodyRoll = mulberry32(); + let body; + let bodyKind; + + if (bodyRoll < 0.61) { + // Templated text body + const template = pick(serviceDef.templates); + const vars = generateTemplateVars(template, serviceName); + body = fillTemplate(template, vars); + bodyKind = "templated"; + stats.bodyTemplated++; + } else if (bodyRoll < 0.995) { + // Structured KVList body + body = generateStructuredBody(serviceName, severity); + bodyKind = "kvlist"; + stats.bodyKvlist++; + } else { + // Free-text (rare) + body = `[${severity}] Unstructured log event at ${new Date(Number(timeUnixNano / nsPerMs)).toISOString()} — ${uuid()}`; + bodyKind = "freetext"; + stats.bodyFreetext++; + } + + // Attributes + const attributes = [ + { key: "service.name", value: serviceName }, + { key: "log.source", value: bodyKind }, + ]; + if (severity === "ERROR" || severity === "FATAL") { + attributes.push({ key: "error", value: "true" }); + } + if (mulberry32() < 0.3) { + attributes.push({ key: "http.method", value: pick(HTTP_METHODS) }); + attributes.push({ key: "http.status_code", value: String(pick(HTTP_STATUS)) }); + } + if (mulberry32() < 0.2) { + attributes.push({ key: "deployment.environment", value: pick(["production", "staging", "canary"]) }); + } + + const { traceId, spanId } = generateTraceContext(); + + const record = { + timeUnixNano, + severityNumber, + severityText: severity, + body, + attributes, + traceId, + spanId, + }; + + records.push(record); + + // Stats + stats.byService[serviceName] = (stats.byService[serviceName] || 0) + 1; + stats.bySeverity[severity] = (stats.bySeverity[severity] || 0) + 1; + stats.totalRecords++; + + // Progress callback every BATCH_SIZE records + if (onProgress && i - reported >= BATCH_SIZE) { + reported = i; + onProgress(i / count); + } + } + + if (onProgress) onProgress(1); + + return { records, stats }; +} + +/** + * Streaming generator — yields batches for incremental ingest. + * @param {object} opts + * @param {number} opts.count + * @param {number} opts.durationMinutes + * @param {number} [opts.batchSize] + * @param {number} [opts.seed] + * @yields {{ batch: Array, progress: number }} + */ +export function* generateLogBatches(opts) { + const { count, durationMinutes, batchSize = 2048, seed = 42 } = opts; + _seed = seed; + + const baseTime = Date.now() - durationMinutes * 60 * 1000; + const nsPerMs = 1_000_000n; + const durationNs = BigInt(durationMinutes) * 60n * 1000n * nsPerMs; + let batch = []; + + for (let i = 0; i < count; i++) { + const serviceName = pick(SERVICE_NAMES); + const serviceDef = SERVICES[serviceName]; + const severity = weightedPick(serviceDef.severityWeights); + const severityNumber = SEVERITY_LEVELS[severity]; + + const progressFraction = i / count; + const jitter = (mulberry32() - 0.5) * 0.01; + const timeFrac = Math.max(0, Math.min(1, progressFraction + jitter)); + const timeUnixNano = BigInt(baseTime) * nsPerMs + BigInt(Math.floor(Number(durationNs) * timeFrac)); + + const bodyRoll = mulberry32(); + let body; + if (bodyRoll < 0.61) { + const template = pick(serviceDef.templates); + const vars = generateTemplateVars(template, serviceName); + body = fillTemplate(template, vars); + } else if (bodyRoll < 0.995) { + body = generateStructuredBody(serviceName, severity); + } else { + body = `[${severity}] Unstructured log event at ${new Date(Number(timeUnixNano / nsPerMs)).toISOString()} — ${uuid()}`; + } + + const attributes = [{ key: "service.name", value: serviceName }]; + if (severity === "ERROR" || severity === "FATAL") { + attributes.push({ key: "error", value: "true" }); + } + + const { traceId, spanId } = generateTraceContext(); + + batch.push({ + timeUnixNano, + severityNumber, + severityText: severity, + body, + attributes, + traceId, + spanId, + }); + + if (batch.length >= batchSize) { + yield { batch, progress: i / count }; + batch = []; + } + } + + if (batch.length > 0) { + yield { batch, progress: 1 }; + } +} diff --git a/site/logsdb-engine/js/logs-model.js b/site/logsdb-engine/js/logs-model.js new file mode 100644 index 00000000..c6173087 --- /dev/null +++ b/site/logsdb-engine/js/logs-model.js @@ -0,0 +1,171 @@ +// @ts-nocheck +// ── Logs Explorer Model ─────────────────────────────────────────────── +// Curated log exploration view. Surfaces problematic patterns, +// error clusters, template analysis, and time-based insights. + +import { query } from "o11ylogsdb"; + +/** + * Analyze the store and produce curated insights. + */ +export function analyzeStore(store) { + store.flush(); + const stats = store.stats(); + + // Query for errors + const errors = query(store, { severityGte: 17, limit: 500 }); + // Query for warnings + const warnings = query(store, { severityGte: 13, limit: 500 }); + // Recent logs sample + const recent = query(store, { limit: 200 }); + + const errorRecords = errors.records; + const warningRecords = warnings.records.filter((r) => r.severityNumber < 17); + + return { + overview: { + totalLogs: stats.totalLogs, + streams: stats.streams, + chunks: stats.chunks, + bytesPerLog: stats.bytesPerLog, + totalBytes: stats.totalChunkBytes, + }, + errors: analyzeErrors(errorRecords), + warnings: analyzeWarnings(warningRecords), + templates: analyzeTemplates(recent.records), + timeline: buildTimeline(recent.records), + services: analyzeServices(store), + }; +} + +function analyzeErrors(records) { + const clusters = {}; + for (const r of records) { + const body = typeof r.body === "string" ? r.body : JSON.stringify(r.body); + // Simple error clustering by first 60 chars + const key = body.slice(0, 60); + if (!clusters[key]) { + clusters[key] = { sample: r, count: 0, services: new Set() }; + } + clusters[key].count++; + const svc = r.attributes?.find((a) => a.key === "service.name")?.value; + if (svc) clusters[key].services.add(svc); + } + + return Object.values(clusters) + .map((c) => ({ + body: typeof c.sample.body === "string" ? c.sample.body : JSON.stringify(c.sample.body), + count: c.count, + services: [...c.services], + severity: c.sample.severityNumber, + firstSeen: c.sample.timeUnixNano, + })) + .sort((a, b) => b.count - a.count) + .slice(0, 20); +} + +function analyzeWarnings(records) { + const clusters = {}; + for (const r of records) { + const body = typeof r.body === "string" ? r.body : JSON.stringify(r.body); + const key = body.slice(0, 60); + if (!clusters[key]) { + clusters[key] = { sample: r, count: 0 }; + } + clusters[key].count++; + } + + return Object.values(clusters) + .map((c) => ({ + body: typeof c.sample.body === "string" ? c.sample.body : JSON.stringify(c.sample.body), + count: c.count, + severity: c.sample.severityNumber, + })) + .sort((a, b) => b.count - a.count) + .slice(0, 10); +} + +function analyzeTemplates(records) { + const templates = {}; + for (const r of records) { + if (typeof r.body !== "string") continue; + // Extract template pattern by replacing numbers, UUIDs, IPs + const pattern = r.body + .replace(/\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b/g, "{uuid}") + .replace(/\b\d+\.\d+\.\d+\.\d+\b/g, "{ip}") + .replace(/\b\d{4,}\b/g, "{num}") + .replace(/\b[0-9a-f]{8}\b/g, "{id}"); + + if (!templates[pattern]) { + templates[pattern] = { pattern, count: 0, sample: r.body }; + } + templates[pattern].count++; + } + + return Object.values(templates) + .sort((a, b) => b.count - a.count) + .slice(0, 15); +} + +function buildTimeline(records) { + if (records.length === 0) return []; + + // Group by minute + const buckets = {}; + for (const r of records) { + const ms = Number(r.timeUnixNano / 1_000_000n); + const minute = Math.floor(ms / 60000) * 60000; + if (!buckets[minute]) { + buckets[minute] = { timestamp: minute, total: 0, errors: 0, warnings: 0 }; + } + buckets[minute].total++; + if (r.severityNumber >= 17) buckets[minute].errors++; + else if (r.severityNumber >= 13) buckets[minute].warnings++; + } + + return Object.values(buckets).sort((a, b) => a.timestamp - b.timestamp); +} + +function analyzeServices(store) { + const services = {}; + for (const streamId of store.streams.ids()) { + const resource = store.streams.resourceOf(streamId); + const svc = resource.attributes.find((a) => a.key === "service.name")?.value ?? "unknown"; + const chunks = store.streams.chunksOf(streamId); + + if (!services[svc]) { + services[svc] = { name: svc, logs: 0, bytes: 0, chunks: 0, errorRate: 0, errors: 0 }; + } + + for (const chunk of chunks) { + services[svc].logs += chunk.header.nLogs; + services[svc].bytes += (chunk.payload?.byteLength ?? 0) + 150; + services[svc].chunks++; + } + } + + // Query errors per service + for (const svc of Object.keys(services)) { + const errorResult = query(store, { + resourceEquals: { "service.name": svc }, + severityGte: 17, + limit: 10000, + }); + services[svc].errors = errorResult.stats.recordsEmitted; + services[svc].errorRate = + services[svc].logs > 0 + ? ((services[svc].errors / services[svc].logs) * 100).toFixed(2) + : "0"; + } + + return Object.values(services).sort((a, b) => b.logs - a.logs); +} + +/** + * Get a live tail of the most recent logs. + */ +export function getRecentLogs(store, limit = 50) { + store.flush(); + const result = query(store, { limit }); + return result.records; +} diff --git a/site/logsdb-engine/js/query-model.js b/site/logsdb-engine/js/query-model.js new file mode 100644 index 00000000..9602c295 --- /dev/null +++ b/site/logsdb-engine/js/query-model.js @@ -0,0 +1,204 @@ +// @ts-nocheck +// ── Query Model ─────────────────────────────────────────────────────── +// Bridges the UI query builder to the o11ylogsdb query engine. +// Translates form state into QuerySpec and formats results. + +import { query } from "o11ylogsdb"; + +// ── Query Builder State ────────────────────────────────────────────── + +/** + * Create a fresh query builder state. + */ +export function createQueryState() { + return { + timeRange: { enabled: false, from: null, to: null }, + severity: { enabled: false, min: "WARN" }, + bodyContains: { enabled: false, value: "" }, + bodyLeafEquals: { enabled: false, path: "", value: "" }, + resourceEquals: { enabled: false, key: "service.name", value: "" }, + limit: { enabled: true, value: 100 }, + }; +} + +const SEVERITY_MAP = { + TRACE: 1, + DEBUG: 5, + INFO: 9, + WARN: 13, + ERROR: 17, + FATAL: 21, +}; + +/** + * Build a QuerySpec from the UI state. + */ +export function buildQuerySpec(state) { + const spec = {}; + + if (state.timeRange.enabled && state.timeRange.from && state.timeRange.to) { + spec.range = { + from: BigInt(state.timeRange.from) * 1_000_000n, + to: BigInt(state.timeRange.to) * 1_000_000n, + }; + } + + if (state.severity.enabled && state.severity.min) { + spec.severityGte = SEVERITY_MAP[state.severity.min] ?? 9; + } + + if (state.bodyContains.enabled && state.bodyContains.value) { + spec.bodyContains = state.bodyContains.value; + } + + if (state.bodyLeafEquals.enabled && state.bodyLeafEquals.path && state.bodyLeafEquals.value) { + spec.bodyLeafEquals = {}; + // Try to parse as number or boolean + let val = state.bodyLeafEquals.value; + if (val === "true") val = true; + else if (val === "false") val = false; + else if (!isNaN(Number(val)) && val !== "") val = Number(val); + spec.bodyLeafEquals[state.bodyLeafEquals.path] = val; + } + + if (state.resourceEquals.enabled && state.resourceEquals.key && state.resourceEquals.value) { + spec.resourceEquals = {}; + spec.resourceEquals[state.resourceEquals.key] = state.resourceEquals.value; + } + + if (state.limit.enabled && state.limit.value > 0) { + spec.limit = state.limit.value; + } + + return spec; +} + +/** + * Execute a query against the store and return formatted results. + */ +export function executeQuery(store, state) { + const spec = buildQuerySpec(state); + const t0 = performance.now(); + const result = query(store, spec); + const elapsed = performance.now() - t0; + + return { + records: result.records, + stats: { + ...result.stats, + totalTimeMs: elapsed.toFixed(1), + recordsPerMs: result.stats.recordsEmitted > 0 + ? (result.stats.recordsEmitted / elapsed).toFixed(1) + : "0", + }, + spec, + }; +} + +// ── Result Formatting ──────────────────────────────────────────────── + +const SEVERITY_LABELS = { + 1: "TRACE", + 2: "TRACE2", + 3: "TRACE3", + 4: "TRACE4", + 5: "DEBUG", + 6: "DEBUG2", + 7: "DEBUG3", + 8: "DEBUG4", + 9: "INFO", + 10: "INFO2", + 11: "INFO3", + 12: "INFO4", + 13: "WARN", + 14: "WARN2", + 15: "WARN3", + 16: "WARN4", + 17: "ERROR", + 18: "ERROR2", + 19: "ERROR3", + 20: "ERROR4", + 21: "FATAL", + 22: "FATAL2", + 23: "FATAL3", + 24: "FATAL4", +}; + +const SEVERITY_COLORS = { + TRACE: "#6b7280", + DEBUG: "#3b82f6", + INFO: "#10b981", + WARN: "#f59e0b", + ERROR: "#ef4444", + FATAL: "#dc2626", +}; + +export function severityLabel(num) { + return SEVERITY_LABELS[num] ?? `SEV${num}`; +} + +export function severityColor(num) { + if (num <= 4) return SEVERITY_COLORS.TRACE; + if (num <= 8) return SEVERITY_COLORS.DEBUG; + if (num <= 12) return SEVERITY_COLORS.INFO; + if (num <= 16) return SEVERITY_COLORS.WARN; + if (num <= 20) return SEVERITY_COLORS.ERROR; + return SEVERITY_COLORS.FATAL; +} + +export function formatTimestamp(nanos) { + const ms = Number(nanos / 1_000_000n); + return new Date(ms).toISOString().replace("T", " ").replace("Z", ""); +} + +export function formatBody(body) { + if (typeof body === "string") return body; + if (body === null || body === undefined) return ""; + if (body instanceof Uint8Array) return ``; + try { + return JSON.stringify(body, null, 2); + } catch { + return String(body); + } +} + +export function formatBodyPreview(body, maxLen = 120) { + const full = typeof body === "string" ? body : JSON.stringify(body); + if (full.length <= maxLen) return full; + return full.slice(0, maxLen) + "…"; +} + +/** + * Compute severity distribution from query results. + */ +export function computeSeverityDistribution(records) { + const dist = { TRACE: 0, DEBUG: 0, INFO: 0, WARN: 0, ERROR: 0, FATAL: 0 }; + for (const r of records) { + const label = severityLabel(r.severityNumber); + const bucket = label.startsWith("TRACE") + ? "TRACE" + : label.startsWith("DEBUG") + ? "DEBUG" + : label.startsWith("INFO") + ? "INFO" + : label.startsWith("WARN") + ? "WARN" + : label.startsWith("ERROR") + ? "ERROR" + : "FATAL"; + dist[bucket]++; + } + return dist; +} + +/** + * Compute per-service record count from results. + */ +export function computeServiceDistribution(records) { + const dist = {}; + for (const r of records) { + const svc = r.attributes?.find((a) => a.key === "service.name")?.value ?? "unknown"; + dist[svc] = (dist[svc] || 0) + 1; + } + return dist; +} diff --git a/site/logsdb-engine/js/storage-model.js b/site/logsdb-engine/js/storage-model.js new file mode 100644 index 00000000..21cab63f --- /dev/null +++ b/site/logsdb-engine/js/storage-model.js @@ -0,0 +1,181 @@ +// @ts-nocheck +// ── Storage Model ───────────────────────────────────────────────────── +// Wraps the real o11ylogsdb engine for the interactive demo. +// Handles ingest, stats computation, and chunk-level inspection. + +import { LogStore, TypedColumnarDrainPolicy, query } from "o11ylogsdb"; + +// ── Engine Setup ───────────────────────────────────────────────────── + +const DEFAULT_RESOURCE = { + attributes: [ + { key: "service.version", value: "1.0.0" }, + { key: "telemetry.sdk.name", value: "o11ykit" }, + ], +}; + +const DEFAULT_SCOPE = { + name: "o11ylogsdb-demo", + version: "1.0.0", + attributes: [], +}; + +/** + * Create a LogStore configured with the TypedColumnarDrainPolicy — + * the most advanced codec that achieves ~3-17 B/log. + */ +export function createStore(opts = {}) { + const { rowsPerChunk = 1024 } = opts; + + const store = new LogStore({ + rowsPerChunk, + policyFactory: () => + new TypedColumnarDrainPolicy({ + bodyCodec: "raw", + }), + }); + + return store; +} + +/** + * Ingest an array of log records into the store. + * Groups records by service.name attribute to simulate per-stream routing. + */ +export function ingestRecords(store, records) { + const t0 = performance.now(); + let appended = 0; + let chunksClosed = 0; + + for (const record of records) { + const serviceName = record.attributes?.find((a) => a.key === "service.name")?.value ?? "unknown"; + + const resource = { + attributes: [ + { key: "service.name", value: serviceName }, + ...DEFAULT_RESOURCE.attributes, + ], + }; + + const result = store.append(resource, DEFAULT_SCOPE, record); + appended += result.recordsAppended; + chunksClosed = result.chunksClosed; + } + + const elapsed = performance.now() - t0; + + return { + recordsIngested: appended, + chunksClosed, + ingestTimeMs: elapsed, + logsPerSecond: Math.round((appended / elapsed) * 1000), + }; +} + +/** + * Get comprehensive stats about the store's current state. + */ +export function getStoreStats(store) { + store.flush(); + const stats = store.stats(); + + return { + ...stats, + compressionRatio: stats.totalLogs > 0 ? estimateRawSize(stats.totalLogs) / stats.totalChunkBytes : 0, + bytesPerLogFormatted: stats.bytesPerLog.toFixed(2), + totalMB: (stats.totalChunkBytes / (1024 * 1024)).toFixed(2), + rawMB: (estimateRawSize(stats.totalLogs) / (1024 * 1024)).toFixed(2), + }; +} + +function estimateRawSize(logCount) { + // Average raw OTLP/JSON log record is ~350 bytes + return logCount * 350; +} + +/** + * Get chunk-level breakdown for the storage explorer. + */ +export function getChunkDetails(store) { + store.flush(); + const chunks = []; + + for (const streamId of store.streams.ids()) { + const resource = store.streams.resourceOf(streamId); + const serviceName = resource.attributes.find((a) => a.key === "service.name")?.value ?? "unknown"; + const streamChunks = store.streams.chunksOf(streamId); + + for (let i = 0; i < streamChunks.length; i++) { + const chunk = streamChunks[i]; + const header = chunk.header; + const payloadBytes = chunk.payload?.byteLength ?? 0; + const headerBytes = estimateHeaderSize(header); + const totalBytes = payloadBytes + headerBytes; + + chunks.push({ + streamId, + service: serviceName, + chunkIndex: i, + nLogs: header.nLogs, + totalBytes, + payloadBytes, + headerBytes, + bytesPerLog: header.nLogs > 0 ? (totalBytes / header.nLogs).toFixed(2) : "0", + timeRange: { + min: header.timeRange.minNano, + max: header.timeRange.maxNano, + }, + severityRange: header.severityRange ?? null, + compressionRatio: header.nLogs > 0 ? (estimateRawSize(header.nLogs) / totalBytes).toFixed(1) : "0", + }); + } + } + + return chunks.sort((a, b) => Number(a.timeRange.min - b.timeRange.min)); +} + +function estimateHeaderSize(header) { + // Rough estimate: JSON-serialized header is ~100-200 bytes + return 150; +} + +/** + * Get per-service storage breakdown. + */ +export function getServiceBreakdown(store) { + store.flush(); + const services = {}; + + for (const streamId of store.streams.ids()) { + const resource = store.streams.resourceOf(streamId); + const serviceName = resource.attributes.find((a) => a.key === "service.name")?.value ?? "unknown"; + const streamChunks = store.streams.chunksOf(streamId); + + if (!services[serviceName]) { + services[serviceName] = { logs: 0, bytes: 0, chunks: 0 }; + } + + for (const chunk of streamChunks) { + services[serviceName].logs += chunk.header.nLogs; + services[serviceName].bytes += (chunk.payload?.byteLength ?? 0) + 150; + services[serviceName].chunks++; + } + } + + return Object.entries(services) + .map(([name, data]) => ({ + name, + ...data, + bytesPerLog: data.logs > 0 ? (data.bytes / data.logs).toFixed(2) : "0", + compressionRatio: data.logs > 0 ? (estimateRawSize(data.logs) / data.bytes).toFixed(1) : "0", + })) + .sort((a, b) => b.logs - a.logs); +} + +/** + * Run a query against the store and return results + stats. + */ +export function runQuery(store, spec) { + store.flush(); + return query(store, spec); +} diff --git a/site/logsdb-engine/js/zlib-stub.js b/site/logsdb-engine/js/zlib-stub.js new file mode 100644 index 00000000..7660e340 --- /dev/null +++ b/site/logsdb-engine/js/zlib-stub.js @@ -0,0 +1,17 @@ +// Browser stub for node:zlib — used by stardb's codec-baseline. +// The logsdb demo uses bodyCodec: "raw" (no zstd/gzip) so these are +// never called at runtime. They exist only to satisfy the import. + +export function gunzipSync() { + throw new Error("node:zlib not available in browser"); +} +export function gzipSync() { + throw new Error("node:zlib not available in browser"); +} +export function zstdCompressSync() { + throw new Error("node:zlib not available in browser"); +} +export function zstdDecompressSync() { + throw new Error("node:zlib not available in browser"); +} +export const constants = { ZSTD_CLEVEL_DEFAULT: 3 }; diff --git a/site/logsdb-engine/test/data-gen.test.js b/site/logsdb-engine/test/data-gen.test.js new file mode 100644 index 00000000..7d658521 --- /dev/null +++ b/site/logsdb-engine/test/data-gen.test.js @@ -0,0 +1,107 @@ +// @ts-nocheck +import { describe, expect, it } from "vitest"; +import { DATASET_PRESETS, generateLogs, generateLogBatches } from "../js/data-gen.js"; + +describe("data-gen", () => { + it("produces the expected count of records", () => { + const { records, stats } = generateLogs({ count: 100, durationMinutes: 5 }); + expect(records.length).toBe(100); + expect(stats.totalRecords).toBe(100); + }); + + it("has deterministic output (same seed)", () => { + const a = generateLogs({ count: 50, durationMinutes: 5, seed: 123 }); + const b = generateLogs({ count: 50, durationMinutes: 5, seed: 123 }); + expect(a.records[0].body).toEqual(b.records[0].body); + expect(a.records[49].severityText).toEqual(b.records[49].severityText); + }); + + it("produces different output with different seeds", () => { + const a = generateLogs({ count: 50, durationMinutes: 5, seed: 1 }); + const b = generateLogs({ count: 50, durationMinutes: 5, seed: 2 }); + // Extremely unlikely to be identical + const bodiesA = a.records.map((r) => String(r.body)).join(""); + const bodiesB = b.records.map((r) => String(r.body)).join(""); + expect(bodiesA).not.toBe(bodiesB); + }); + + it("distributes across all 6 services", () => { + const { stats } = generateLogs({ count: 1000, durationMinutes: 10 }); + const serviceCount = Object.keys(stats.byService).length; + expect(serviceCount).toBe(6); + }); + + it("respects body shape distribution (~61% templated, ~39% kvlist)", () => { + const { stats } = generateLogs({ count: 10000, durationMinutes: 30 }); + const templatedRatio = stats.bodyTemplated / stats.totalRecords; + const kvlistRatio = stats.bodyKvlist / stats.totalRecords; + // Allow ±5% tolerance + expect(templatedRatio).toBeGreaterThan(0.55); + expect(templatedRatio).toBeLessThan(0.67); + expect(kvlistRatio).toBeGreaterThan(0.33); + expect(kvlistRatio).toBeLessThan(0.45); + }); + + it("generates valid timestamps in order", () => { + const { records } = generateLogs({ count: 500, durationMinutes: 5 }); + for (const r of records) { + expect(typeof r.timeUnixNano).toBe("bigint"); + expect(r.timeUnixNano).toBeGreaterThan(0n); + } + // Generally mostly monotonic (with some jitter expected) + let outOfOrder = 0; + for (let i = 1; i < records.length; i++) { + if (records[i].timeUnixNano < records[i - 1].timeUnixNano) outOfOrder++; + } + // Allow up to 50% out of order due to jitter (records span the full time range with random noise) + expect(outOfOrder / records.length).toBeLessThan(0.5); + }); + + it("generates valid severity numbers (1-24)", () => { + const { records } = generateLogs({ count: 500, durationMinutes: 5 }); + for (const r of records) { + expect(r.severityNumber).toBeGreaterThanOrEqual(1); + expect(r.severityNumber).toBeLessThanOrEqual(24); + } + }); + + it("generates trace context for ~70% of records", () => { + const { records } = generateLogs({ count: 1000, durationMinutes: 5 }); + const withTrace = records.filter((r) => r.traceId); + const ratio = withTrace.length / records.length; + expect(ratio).toBeGreaterThan(0.6); + expect(ratio).toBeLessThan(0.8); + // Verify traceId is 16 bytes, spanId is 8 bytes + for (const r of withTrace.slice(0, 10)) { + expect(r.traceId.length).toBe(16); + expect(r.spanId.length).toBe(8); + } + }); + + it("has proper attributes on every record", () => { + const { records } = generateLogs({ count: 100, durationMinutes: 5 }); + for (const r of records) { + expect(Array.isArray(r.attributes)).toBe(true); + const svc = r.attributes.find((a) => a.key === "service.name"); + expect(svc).toBeDefined(); + expect(typeof svc.value).toBe("string"); + } + }); + + it("streaming generator yields complete coverage", () => { + const batches = []; + let totalRecords = 0; + for (const { batch, progress } of generateLogBatches({ count: 500, durationMinutes: 5, batchSize: 100 })) { + batches.push(batch); + totalRecords += batch.length; + } + expect(totalRecords).toBe(500); + expect(batches.length).toBe(5); + }); + + it("DATASET_PRESETS has expected keys", () => { + expect(Object.keys(DATASET_PRESETS)).toEqual(["small", "medium", "large", "massive"]); + expect(DATASET_PRESETS.small.count).toBe(10_000); + expect(DATASET_PRESETS.massive.count).toBe(2_000_000); + }); +}); diff --git a/site/logsdb-engine/test/query-model.test.js b/site/logsdb-engine/test/query-model.test.js new file mode 100644 index 00000000..6cb3d224 --- /dev/null +++ b/site/logsdb-engine/test/query-model.test.js @@ -0,0 +1,176 @@ +// @ts-nocheck +import { describe, expect, it } from "vitest"; +import { generateLogs } from "../js/data-gen.js"; +import { + buildQuerySpec, + computeSeverityDistribution, + computeServiceDistribution, + createQueryState, + executeQuery, + formatBody, + formatBodyPreview, + formatTimestamp, + severityColor, + severityLabel, +} from "../js/query-model.js"; +import { createStore, ingestRecords } from "../js/storage-model.js"; + +describe("query-model", () => { + function makePopulatedStore(count = 500) { + const { records } = generateLogs({ count, durationMinutes: 5 }); + const store = createStore({ rowsPerChunk: 128 }); + ingestRecords(store, records); + return store; + } + + describe("createQueryState", () => { + it("returns default state with limit enabled", () => { + const state = createQueryState(); + expect(state.limit.enabled).toBe(true); + expect(state.limit.value).toBe(100); + expect(state.severity.enabled).toBe(false); + expect(state.bodyContains.enabled).toBe(false); + }); + }); + + describe("buildQuerySpec", () => { + it("builds empty spec when nothing enabled", () => { + const state = createQueryState(); + state.limit.enabled = false; + const spec = buildQuerySpec(state); + expect(spec).toEqual({}); + }); + + it("includes severity filter when enabled", () => { + const state = createQueryState(); + state.severity.enabled = true; + state.severity.min = "ERROR"; + const spec = buildQuerySpec(state); + expect(spec.severityGte).toBe(17); + }); + + it("includes body contains when enabled", () => { + const state = createQueryState(); + state.bodyContains.enabled = true; + state.bodyContains.value = "timeout"; + const spec = buildQuerySpec(state); + expect(spec.bodyContains).toBe("timeout"); + }); + + it("includes resource equals when enabled", () => { + const state = createQueryState(); + state.resourceEquals.enabled = true; + state.resourceEquals.key = "service.name"; + state.resourceEquals.value = "database"; + const spec = buildQuerySpec(state); + expect(spec.resourceEquals).toEqual({ "service.name": "database" }); + }); + + it("includes limit when enabled", () => { + const state = createQueryState(); + state.limit.enabled = true; + state.limit.value = 50; + const spec = buildQuerySpec(state); + expect(spec.limit).toBe(50); + }); + }); + + describe("executeQuery", () => { + it("returns results with stats", () => { + const store = makePopulatedStore(2000); + const state = createQueryState(); + state.severity.enabled = true; + state.severity.min = "WARN"; + const result = executeQuery(store, state); + expect(result.records.length).toBeGreaterThan(0); + expect(result.records.length).toBeLessThanOrEqual(100); + expect(result.stats.totalTimeMs).toBeDefined(); + expect(result.stats.chunksScanned).toBeGreaterThan(0); + // All returned records should be WARN or above + for (const r of result.records) { + expect(r.severityNumber).toBeGreaterThanOrEqual(13); + } + }); + + it("filters by service name", () => { + const store = makePopulatedStore(1000); + const state = createQueryState(); + state.resourceEquals.enabled = true; + state.resourceEquals.key = "service.name"; + state.resourceEquals.value = "database"; + const result = executeQuery(store, state); + expect(result.records.length).toBeGreaterThan(0); + }); + + it("filters by body substring", () => { + const store = makePopulatedStore(1000); + const state = createQueryState(); + state.bodyContains.enabled = true; + state.bodyContains.value = "Deadlock"; + const result = executeQuery(store, state); + for (const r of result.records) { + expect(String(r.body)).toContain("Deadlock"); + } + }); + }); + + describe("formatting helpers", () => { + it("severityLabel maps correctly", () => { + expect(severityLabel(1)).toBe("TRACE"); + expect(severityLabel(9)).toBe("INFO"); + expect(severityLabel(13)).toBe("WARN"); + expect(severityLabel(17)).toBe("ERROR"); + expect(severityLabel(21)).toBe("FATAL"); + }); + + it("severityColor returns valid colors", () => { + expect(severityColor(1)).toContain("#"); + expect(severityColor(17)).toContain("#"); + }); + + it("formatTimestamp produces ISO-like string", () => { + const ts = 1714280000000000000n; // some nano timestamp + const result = formatTimestamp(ts); + expect(result).toMatch(/\d{4}-\d{2}-\d{2}/); + }); + + it("formatBody handles different types", () => { + expect(formatBody("hello")).toBe("hello"); + expect(formatBody(null)).toBe(""); + expect(formatBody({ a: 1 })).toBe('{\n "a": 1\n}'); + expect(formatBody(new Uint8Array(5))).toBe(""); + }); + + it("formatBodyPreview truncates long strings", () => { + const long = "a".repeat(200); + const preview = formatBodyPreview(long, 50); + expect(preview.length).toBeLessThanOrEqual(51); // 50 + "…" + }); + }); + + describe("distribution helpers", () => { + it("computeSeverityDistribution counts correctly", () => { + const records = [ + { severityNumber: 9 }, + { severityNumber: 9 }, + { severityNumber: 17 }, + { severityNumber: 21 }, + ]; + const dist = computeSeverityDistribution(records); + expect(dist.INFO).toBe(2); + expect(dist.ERROR).toBe(1); + expect(dist.FATAL).toBe(1); + }); + + it("computeServiceDistribution groups by service", () => { + const records = [ + { attributes: [{ key: "service.name", value: "a" }] }, + { attributes: [{ key: "service.name", value: "a" }] }, + { attributes: [{ key: "service.name", value: "b" }] }, + ]; + const dist = computeServiceDistribution(records); + expect(dist.a).toBe(2); + expect(dist.b).toBe(1); + }); + }); +}); diff --git a/site/logsdb-engine/test/storage-model.test.js b/site/logsdb-engine/test/storage-model.test.js new file mode 100644 index 00000000..404f5583 --- /dev/null +++ b/site/logsdb-engine/test/storage-model.test.js @@ -0,0 +1,80 @@ +// @ts-nocheck +import { describe, expect, it } from "vitest"; +import { generateLogs } from "../js/data-gen.js"; +import { + createStore, + getChunkDetails, + getServiceBreakdown, + getStoreStats, + ingestRecords, +} from "../js/storage-model.js"; + +describe("storage-model", () => { + function makeStore(count = 500) { + const { records } = generateLogs({ count, durationMinutes: 5 }); + const store = createStore({ rowsPerChunk: 128 }); + ingestRecords(store, records); + return store; + } + + it("creates a store and ingests records", () => { + const { records } = generateLogs({ count: 100, durationMinutes: 5 }); + const store = createStore(); + const result = ingestRecords(store, records); + expect(result.recordsIngested).toBe(100); + expect(result.ingestTimeMs).toBeGreaterThan(0); + expect(result.logsPerSecond).toBeGreaterThan(0); + }); + + it("getStoreStats returns valid metrics", () => { + const store = makeStore(300); + const stats = getStoreStats(store); + expect(stats.totalLogs).toBe(300); + expect(stats.streams).toBeGreaterThan(0); + expect(stats.chunks).toBeGreaterThan(0); + expect(stats.totalChunkBytes).toBeGreaterThan(0); + expect(stats.bytesPerLog).toBeGreaterThan(0); + expect(stats.compressionRatio).toBeGreaterThan(0); + }); + + it("getChunkDetails returns per-chunk info", () => { + const store = makeStore(500); + const chunks = getChunkDetails(store); + expect(chunks.length).toBeGreaterThan(0); + for (const c of chunks) { + expect(c.service).toBeTruthy(); + expect(c.nLogs).toBeGreaterThan(0); + expect(c.totalBytes).toBeGreaterThan(0); + expect(c.timeRange.min).toBeTruthy(); + expect(c.timeRange.max).toBeTruthy(); + } + }); + + it("getServiceBreakdown shows all services", () => { + const store = makeStore(1000); + const services = getServiceBreakdown(store); + expect(services.length).toBe(6); + for (const s of services) { + expect(s.name).toBeTruthy(); + expect(s.logs).toBeGreaterThan(0); + expect(s.bytes).toBeGreaterThan(0); + expect(s.chunks).toBeGreaterThan(0); + } + const totalLogs = services.reduce((s, x) => s + x.logs, 0); + expect(totalLogs).toBe(1000); + }); + + it("round-trips records through the engine", () => { + const { records } = generateLogs({ count: 50, durationMinutes: 5 }); + const store = createStore({ rowsPerChunk: 50 }); + ingestRecords(store, records); + store.flush(); + + // Read records back via the engine's iterator + let decoded = 0; + for (const { records: chunk } of store.iterRecords()) { + decoded += chunk.length; + } + expect(decoded).toBe(50); + }); +}); diff --git a/site/logsdb-engine/vite.config.ts b/site/logsdb-engine/vite.config.ts new file mode 100644 index 00000000..e9683cf0 --- /dev/null +++ b/site/logsdb-engine/vite.config.ts @@ -0,0 +1,26 @@ +import { resolve } from "node:path"; +import { defineConfig } from "vite"; + +export default defineConfig({ + base: process.env.BASE_PATH ?? "/o11ykit/logsdb-engine/", + root: resolve(__dirname), + resolve: { + alias: { + o11ylogsdb: resolve(__dirname, "../../packages/o11ylogsdb/src/index.ts"), + stardb: resolve(__dirname, "../../packages/stardb/src/index.ts"), + "node:zlib": resolve(__dirname, "js/zlib-stub.js"), + }, + }, + server: { + fs: { allow: [resolve(__dirname, "../..")] }, + }, + build: { + outDir: "dist", + emptyOutDir: true, + rollupOptions: { + input: { + main: resolve(__dirname, "index.html"), + }, + }, + }, +}); From 92611a45c7ce63bd32cac500c882e5311fed3da2 Mon Sep 17 00:00:00 2001 From: strawgate Date: Mon, 27 Apr 2026 23:48:38 -0500 Subject: [PATCH 02/11] =?UTF-8?q?perf(logsdb):=20add=20WeakMap=20decode=20?= =?UTF-8?q?cache=20=E2=80=94=2050-100=C3=97=20repeated=20query=20speedup?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Chunks are immutable after freeze, so decoded LogRecord[] can be cached safely in a WeakMap. The query engine now uses store.decodeChunk() which returns cached results, eliminating redundant ZSTD decompression on repeated queries (the common case for dashboard refresh). Benchmark results (10K records, comprehensive-query): Before: 14-91ms p50 across corpus types After: 0.1-0.9ms p50 (cache-hot) Pattern borrowed from o11ytracesdb/src/engine.ts which uses the same WeakMap approach. Memory cost: one additional reference per chunk (records are retained while chunk exists anyway). GC is automatic via WeakMap — no invalidation logic needed. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- packages/o11ylogsdb/src/engine.ts | 27 ++++++++++++++++++++++++--- packages/o11ylogsdb/src/query.ts | 7 ++----- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/packages/o11ylogsdb/src/engine.ts b/packages/o11ylogsdb/src/engine.ts index e7150bcc..ed1f67de 100644 --- a/packages/o11ylogsdb/src/engine.ts +++ b/packages/o11ylogsdb/src/engine.ts @@ -19,7 +19,7 @@ import type { CodecRegistry } from "stardb"; import { defaultRegistry } from "stardb"; -import type { ChunkPolicy } from "./chunk.js"; +import type { Chunk, ChunkPolicy } from "./chunk.js"; import { ChunkBuilder, chunkWireSize, DefaultChunkPolicy, readRecords } from "./chunk.js"; import type { BodyClassifier } from "./classify.js"; import { defaultClassifier } from "./classify.js"; @@ -91,6 +91,13 @@ export class LogStore { private policyByStream: Map = new Map(); private chunksClosed: number = 0; + /** + * Decode cache: avoids re-decompressing immutable chunks on repeated + * queries. WeakMap ensures entries are GC'd when chunks are evicted. + * Pattern borrowed from o11ytracesdb/src/engine.ts. + */ + private readonly _decodeCache = new WeakMap(); + constructor(config: LogStoreConfig = {}) { this.registry = config.registry ?? defaultRegistry(); this.classifier = config.classifier ?? defaultClassifier; @@ -171,10 +178,24 @@ export class LogStore { */ *iterRecords(): Generator<{ streamId: StreamId; records: LogRecord[] }> { for (const id of this.streams.ids()) { - const policy = this.policyFor(id); for (const chunk of this.streams.chunksOf(id)) { - yield { streamId: id, records: readRecords(chunk, this.registry, policy) }; + yield { streamId: id, records: this.decodeChunk(id, chunk) }; } } } + + /** + * Decode a chunk's records, using the internal decode cache to avoid + * redundant ZSTD decompression on repeated reads. Chunks are + * immutable after freeze so the cached value never goes stale. + */ + decodeChunk(streamId: StreamId, chunk: Chunk): LogRecord[] { + let cached = this._decodeCache.get(chunk); + if (!cached) { + const policy = this.policyFor(streamId); + cached = readRecords(chunk, this.registry, policy); + this._decodeCache.set(chunk, cached); + } + return cached; + } } diff --git a/packages/o11ylogsdb/src/query.ts b/packages/o11ylogsdb/src/query.ts index 65d4411c..f489906c 100644 --- a/packages/o11ylogsdb/src/query.ts +++ b/packages/o11ylogsdb/src/query.ts @@ -34,10 +34,8 @@ */ import type { Chunk } from "./chunk.js"; -import { readRecords } from "./chunk.js"; import type { LogStore } from "./engine.js"; import type { LogRecord, StreamId } from "./types.js"; - // ── Public types ───────────────────────────────────────────────────── export interface QuerySpec { @@ -130,7 +128,6 @@ export function* queryStream( continue; } const chunks = store.streams.chunksOf(id); - const policy = store.policyFor(id); for (const chunk of chunks) { stats.chunksScanned++; if (!chunkOverlapsRange(chunk, spec.range)) { @@ -141,9 +138,9 @@ export function* queryStream( stats.chunksPruned++; continue; } - // Decode this chunk and walk records. + // Decode this chunk (cache-backed: no redundant ZSTD decode). const t0 = nowMillis(); - const records = readRecords(chunk, store.registry, policy); + const records = store.decodeChunk(id, chunk); stats.decodeMillis += nowMillis() - t0; for (const record of records) { stats.recordsScanned++; From c3f2ce216fa392ee68135c05413d0104ef18150b Mon Sep 17 00:00:00 2001 From: strawgate Date: Mon, 27 Apr 2026 23:49:23 -0500 Subject: [PATCH 03/11] =?UTF-8?q?Revert=20"perf(logsdb):=20add=20WeakMap?= =?UTF-8?q?=20decode=20cache=20=E2=80=94=2050-100=C3=97=20repeated=20query?= =?UTF-8?q?=20speedup"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 92611a45c7ce63bd32cac500c882e5311fed3da2. --- packages/o11ylogsdb/src/engine.ts | 27 +++------------------------ packages/o11ylogsdb/src/query.ts | 7 +++++-- 2 files changed, 8 insertions(+), 26 deletions(-) diff --git a/packages/o11ylogsdb/src/engine.ts b/packages/o11ylogsdb/src/engine.ts index ed1f67de..e7150bcc 100644 --- a/packages/o11ylogsdb/src/engine.ts +++ b/packages/o11ylogsdb/src/engine.ts @@ -19,7 +19,7 @@ import type { CodecRegistry } from "stardb"; import { defaultRegistry } from "stardb"; -import type { Chunk, ChunkPolicy } from "./chunk.js"; +import type { ChunkPolicy } from "./chunk.js"; import { ChunkBuilder, chunkWireSize, DefaultChunkPolicy, readRecords } from "./chunk.js"; import type { BodyClassifier } from "./classify.js"; import { defaultClassifier } from "./classify.js"; @@ -91,13 +91,6 @@ export class LogStore { private policyByStream: Map = new Map(); private chunksClosed: number = 0; - /** - * Decode cache: avoids re-decompressing immutable chunks on repeated - * queries. WeakMap ensures entries are GC'd when chunks are evicted. - * Pattern borrowed from o11ytracesdb/src/engine.ts. - */ - private readonly _decodeCache = new WeakMap(); - constructor(config: LogStoreConfig = {}) { this.registry = config.registry ?? defaultRegistry(); this.classifier = config.classifier ?? defaultClassifier; @@ -178,24 +171,10 @@ export class LogStore { */ *iterRecords(): Generator<{ streamId: StreamId; records: LogRecord[] }> { for (const id of this.streams.ids()) { + const policy = this.policyFor(id); for (const chunk of this.streams.chunksOf(id)) { - yield { streamId: id, records: this.decodeChunk(id, chunk) }; + yield { streamId: id, records: readRecords(chunk, this.registry, policy) }; } } } - - /** - * Decode a chunk's records, using the internal decode cache to avoid - * redundant ZSTD decompression on repeated reads. Chunks are - * immutable after freeze so the cached value never goes stale. - */ - decodeChunk(streamId: StreamId, chunk: Chunk): LogRecord[] { - let cached = this._decodeCache.get(chunk); - if (!cached) { - const policy = this.policyFor(streamId); - cached = readRecords(chunk, this.registry, policy); - this._decodeCache.set(chunk, cached); - } - return cached; - } } diff --git a/packages/o11ylogsdb/src/query.ts b/packages/o11ylogsdb/src/query.ts index f489906c..65d4411c 100644 --- a/packages/o11ylogsdb/src/query.ts +++ b/packages/o11ylogsdb/src/query.ts @@ -34,8 +34,10 @@ */ import type { Chunk } from "./chunk.js"; +import { readRecords } from "./chunk.js"; import type { LogStore } from "./engine.js"; import type { LogRecord, StreamId } from "./types.js"; + // ── Public types ───────────────────────────────────────────────────── export interface QuerySpec { @@ -128,6 +130,7 @@ export function* queryStream( continue; } const chunks = store.streams.chunksOf(id); + const policy = store.policyFor(id); for (const chunk of chunks) { stats.chunksScanned++; if (!chunkOverlapsRange(chunk, spec.range)) { @@ -138,9 +141,9 @@ export function* queryStream( stats.chunksPruned++; continue; } - // Decode this chunk (cache-backed: no redundant ZSTD decode). + // Decode this chunk and walk records. const t0 = nowMillis(); - const records = store.decodeChunk(id, chunk); + const records = readRecords(chunk, store.registry, policy); stats.decodeMillis += nowMillis() - t0; for (const record of records) { stats.recordsScanned++; From 23f16c2591befb7bd19a8c3388eb86b835cf016d Mon Sep 17 00:00:00 2001 From: strawgate Date: Mon, 27 Apr 2026 23:55:56 -0500 Subject: [PATCH 04/11] perf(logsdb): partial column decode + template-token chunk pruning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two real engine optimizations for bodyContains queries: 1. Partial column decode (decodeBodiesOnly): skips sidecar JSON.parse entirely — only decodes kinds + templates + variable columns. Result: 9× speedup on high-cardinality body_substring (91ms → 10ms). 2. Template-token chunk pruning: stores template literal tokens in chunk header (codecMeta.toks). For bodyContains queries, checks if any token contains the needle BEFORE ZSTD decompression. If no template token matches and the needle looks like a keyword, the entire chunk is skipped without touching compressed data. Result: syslog body_substring 11ms → 0.1ms (113× faster), high-cardinality 91ms → 0ms (all chunks pruned at header level). These are real engine path improvements — no caching, no extra memory. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- packages/o11ylogsdb/src/chunk.ts | 40 +++- packages/o11ylogsdb/src/codec-typed.ts | 251 ++++++++++++++++++++++++- packages/o11ylogsdb/src/index.ts | 1 + packages/o11ylogsdb/src/query.ts | 103 ++++++++-- 4 files changed, 379 insertions(+), 16 deletions(-) diff --git a/packages/o11ylogsdb/src/chunk.ts b/packages/o11ylogsdb/src/chunk.ts index 0d7077e7..0a22d340 100644 --- a/packages/o11ylogsdb/src/chunk.ts +++ b/packages/o11ylogsdb/src/chunk.ts @@ -22,7 +22,7 @@ */ import type { CodecRegistry } from "stardb"; -import type { InstrumentationScope, LogRecord, Resource } from "./types.js"; +import type { AnyValue, InstrumentationScope, LogRecord, Resource } from "./types.js"; const MAGIC_BYTES = new Uint8Array([0x4f, 0x4c, 0x44, 0x42]); // "OLDB" export const CHUNK_VERSION = 1; @@ -95,6 +95,19 @@ export interface ChunkPolicy { * and reconstructs the records. */ decodePayload?(buf: Uint8Array, nLogs: number, meta: unknown): LogRecord[]; + + /** + * Partial decode: extract only body strings from the payload. Returns + * an array of body values (string or structured) without materializing + * full LogRecord objects — skips sidecar JSON parse, attribute + * reconstruction, traceId/spanId decoding, etc. + * + * Used by the query engine for `bodyContains` predicates to avoid the + * ~60% CPU cost of full-record materialization when only the body + * column is needed. Falls back to full `decodePayload` when not + * implemented. + */ + decodeBodiesOnly?(buf: Uint8Array, nLogs: number, meta: unknown): AnyValue[]; } /** Default policy: ZSTD-19 over the NDJSON form. Simple and decent. */ @@ -202,6 +215,31 @@ export function readRecords( return decoded; } +/** + * Partial decode: extract only body values from a chunk. Skips sidecar + * JSON parse, attribute reconstruction, traceId/spanId, etc. For string- + * bodied logs this is dramatically cheaper than full `readRecords`. + * + * Falls back to full decode when the policy doesn't implement + * `decodeBodiesOnly`. + */ +export function readBodiesOnly( + chunk: Chunk, + registry: CodecRegistry, + policy?: ChunkPolicy +): AnyValue[] { + const codec = registry.get(chunk.header.codecName); + const raw = codec.decode(chunk.payload); + if (policy?.decodeBodiesOnly) { + return policy.decodeBodiesOnly(raw, chunk.header.nLogs, chunk.header.codecMeta); + } + // Fallback: full decode and extract bodies + if (policy?.decodePayload) { + return policy.decodePayload(raw, chunk.header.nLogs, chunk.header.codecMeta).map((r) => r.body); + } + return decodeNdjsonRecords(raw, chunk.header.nLogs).map((r) => r.body); +} + /** * Wire size of a chunk without materializing the full byte buffer — * the header JSON is encoded just to measure its length, but the diff --git a/packages/o11ylogsdb/src/codec-typed.ts b/packages/o11ylogsdb/src/codec-typed.ts index 1153cf1a..b099e307 100644 --- a/packages/o11ylogsdb/src/codec-typed.ts +++ b/packages/o11ylogsdb/src/codec-typed.ts @@ -185,9 +185,13 @@ export interface TypedColumnarDrainPolicyConfig { interface TypedColumnarChunkMeta { v: 3; drain: true; - // slot_types live in the compressed payload, not the uncompressed - // chunk-header codecMeta — measurement showed ~0.5 B/log of - // overhead can be lost to the header. Keep meta minimal here. + /** + * Distinct literal tokens from all templates used in this chunk. + * Stored uncompressed in the chunk header so the query engine can + * prune chunks for bodyContains without ZSTD decompression. + * Only non-PARAM_STR tokens are included. + */ + toks?: string[]; } // ── ByteBuf / ByteCursor ───────────────────────────────────────────── @@ -881,6 +885,15 @@ function encode( } const meta: TypedColumnarChunkMeta = { v: 3, drain: true }; + // Collect distinct literal tokens from templates for bodyContains pruning. + // Only include non-wildcard tokens (template literals, not PARAM_STR). + const tokenSet = new Set(); + for (const { template } of templatesInPayload) { + for (const tok of template.split(/\s+/)) { + if (tok.length > 0 && tok !== PARAM_STR) tokenSet.add(tok); + } + } + if (tokenSet.size > 0) meta.toks = [...tokenSet]; return { payload: buf.finish(), meta }; } @@ -1152,6 +1165,234 @@ function decode(buf: Uint8Array, expectedN: number, _meta: TypedColumnarChunkMet return out; } +/** + * Partial decode: extract only body values from the payload. Skips the + * sidecar JSON.parse() (which is ~40-60% of full decode CPU) and + * attribute/traceId/spanId reconstruction. Returns AnyValue[] in record + * order suitable for substring matching. + * + * For KIND_OTHER records (structured bodies stored in sidecar), we must + * still parse those specific sidecar lines. But for templated and + * raw-string bodies (the 95%+ case), zero JSON parsing occurs. + */ +function decodeBodies(buf: Uint8Array, expectedN: number): AnyValue[] { + const cur = new ByteCursor(buf); + const n = cur.readVarint(); + if (n !== expectedN) { + throw new Error(`typed: count mismatch payload=${n} header=${expectedN}`); + } + // Skip timestamps (delta-encoded varints) + for (let i = 0; i < n; i++) cur.readZZVarintBig(); + // Skip severities + cur.readBytes(n); + // kinds + const kinds = new Uint8Array(cur.readBytes(n)); + // template dictionary + const nTemplates = cur.readVarint(); + const templateById = new Map(); + const dec = new TextDecoder(); + for (let t = 0; t < nTemplates; t++) { + const id = cur.readVarint(); + const len = cur.readVarint(); + const tplStr = dec.decode(cur.readBytes(len)); + templateById.set( + id, + tplStr.split(/\s+/).filter((s) => s.length > 0) + ); + } + // slot-type table + const slotTypeArrays = new Map(); + const slotPrefixArrays = new Map(); + const slotTsShapeArrays = new Map(); + const nSlotTypes = cur.readVarint(); + const slotTypeMap = new Map(); + for (let i = 0; i < nSlotTypes; i++) { + const tplId = cur.readVarint(); + const slotIdx = cur.readVarint(); + const type = cur.readByte(); + const entry: { type: number; prefix?: string; timestampShapeId?: number } = { type }; + if (type === SLOT_PREFIXED_INT64 || type === SLOT_PREFIXED_UUID) { + const plen = cur.readVarint(); + entry.prefix = dec.decode(cur.readBytes(plen)); + } else if (type === SLOT_TIMESTAMP_DELTA) { + entry.timestampShapeId = cur.readVarint(); + } + slotTypeMap.set(`${tplId}/${slotIdx}`, entry); + } + // Transpose to per-template arrays (same as full decode) + for (const [tplId, template] of templateById) { + let nVars = 0; + for (const t of template) if (t === PARAM_STR) nVars++; + if (nVars === 0) continue; + const types = new Int8Array(nVars); + const prefixes = new Array(nVars); + const tsShapes = new Array(nVars); + for (let s = 0; s < nVars; s++) { + const slot = slotTypeMap.get(`${tplId}/${s}`); + if (!slot) continue; + types[s] = slot.type; + if (slot.prefix !== undefined) prefixes[s] = slot.prefix; + if (slot.timestampShapeId !== undefined) tsShapes[s] = slot.timestampShapeId; + } + slotTypeArrays.set(tplId, types); + slotPrefixArrays.set(tplId, prefixes); + slotTsShapeArrays.set(tplId, tsShapes); + } + // raw-string bodies + const rawStringByRecord = new Map(); + for (let i = 0; i < n; i++) { + if (kinds[i] !== KIND_RAW_STRING) continue; + const len = cur.readVarint(); + rawStringByRecord.set(i, dec.decode(cur.readBytes(len))); + } + // templated columns + const templatedIndices: number[] = []; + for (let i = 0; i < n; i++) if (kinds[i] === KIND_TEMPLATED) templatedIndices.push(i); + const tplIds: number[] = new Array(templatedIndices.length); + for (let i = 0; i < templatedIndices.length; i++) tplIds[i] = cur.readVarint(); + const varCounts: number[] = new Array(templatedIndices.length); + for (let i = 0; i < templatedIndices.length; i++) varCounts[i] = cur.readVarint(); + const allVars: string[][] = templatedIndices.map( + (_, i) => new Array(varCounts[i] as number) + ); + // 7 passes (same as full decode — bodies depend on variable reconstruction) + // Pass 1: SLOT_STRING + for (let i = 0; i < templatedIndices.length; i++) { + const tplId = tplIds[i] as number; + const types = slotTypeArrays.get(tplId); + if (!types) continue; + const nVars = varCounts[i] as number; + const vars = allVars[i] as string[]; + for (let s = 0; s < nVars; s++) { + if (types[s] !== SLOT_STRING) continue; + const len = cur.readVarint(); + vars[s] = dec.decode(cur.readBytes(len)); + } + } + // Pass 2: SLOT_SIGNED_INT + for (let i = 0; i < templatedIndices.length; i++) { + const tplId = tplIds[i] as number; + const types = slotTypeArrays.get(tplId); + if (!types) continue; + const nVars = varCounts[i] as number; + const vars = allVars[i] as string[]; + for (let s = 0; s < nVars; s++) { + if (types[s] !== SLOT_SIGNED_INT) continue; + vars[s] = cur.readZZVarintBig().toString(); + } + } + // Pass 3: SLOT_PREFIXED_INT64 + for (let i = 0; i < templatedIndices.length; i++) { + const tplId = tplIds[i] as number; + const types = slotTypeArrays.get(tplId); + if (!types) continue; + const prefixes = slotPrefixArrays.get(tplId); + const nVars = varCounts[i] as number; + const vars = allVars[i] as string[]; + for (let s = 0; s < nVars; s++) { + if (types[s] !== SLOT_PREFIXED_INT64) continue; + const big = cur.readI64LE(); + vars[s] = `${prefixes?.[s] ?? ""}${big.toString()}`; + } + } + // Pass 4: SLOT_UUID + for (let i = 0; i < templatedIndices.length; i++) { + const tplId = tplIds[i] as number; + const types = slotTypeArrays.get(tplId); + if (!types) continue; + const nVars = varCounts[i] as number; + const vars = allVars[i] as string[]; + for (let s = 0; s < nVars; s++) { + if (types[s] !== SLOT_UUID) continue; + vars[s] = bytesToUuid(cur.readBytes(16)); + } + } + // Pass 5: SLOT_UUID_NODASH + for (let i = 0; i < templatedIndices.length; i++) { + const tplId = tplIds[i] as number; + const types = slotTypeArrays.get(tplId); + if (!types) continue; + const nVars = varCounts[i] as number; + const vars = allVars[i] as string[]; + for (let s = 0; s < nVars; s++) { + if (types[s] !== SLOT_UUID_NODASH) continue; + vars[s] = bytesToUuidNodash(cur.readBytes(16)); + } + } + // Pass 6: SLOT_PREFIXED_UUID + for (let i = 0; i < templatedIndices.length; i++) { + const tplId = tplIds[i] as number; + const types = slotTypeArrays.get(tplId); + if (!types) continue; + const prefixes = slotPrefixArrays.get(tplId); + const nVars = varCounts[i] as number; + const vars = allVars[i] as string[]; + for (let s = 0; s < nVars; s++) { + if (types[s] !== SLOT_PREFIXED_UUID) continue; + vars[s] = `${prefixes?.[s] ?? ""}${bytesToUuid(cur.readBytes(16))}`; + } + } + // Pass 7: SLOT_TIMESTAMP_DELTA + const tsPrev = new Map(); + for (let i = 0; i < templatedIndices.length; i++) { + const tplId = tplIds[i] as number; + const types = slotTypeArrays.get(tplId); + if (!types) continue; + const tsShapeIds = slotTsShapeArrays.get(tplId); + const nVars = varCounts[i] as number; + const vars = allVars[i] as string[]; + for (let s = 0; s < nVars; s++) { + if (types[s] !== SLOT_TIMESTAMP_DELTA) continue; + const dt = cur.readZZVarintBig(); + const key = (tplId << 16) | s; + const prev = tsPrev.get(key) ?? 0n; + const cur2 = prev + dt; + tsPrev.set(key, cur2); + const shape = tsShape(tsShapeIds?.[s] as number); + vars[s] = shape.format(cur2); + } + } + // Reconstruct templated bodies + const templatedBodies: string[] = new Array(templatedIndices.length); + for (let i = 0; i < templatedIndices.length; i++) { + const tplId = tplIds[i] as number; + const template = templateById.get(tplId); + if (!template) throw new Error(`typed: missing template id ${tplId}`); + templatedBodies[i] = reconstruct(template, allVars[i] as string[]); + } + + // Read sidecar ONLY for KIND_OTHER records (structured bodies) + const hasOther = kinds.some((k) => k === KIND_OTHER); + let otherBodies: Map | undefined; + if (hasOther) { + const sidecarLen = cur.readVarint(); + if (sidecarLen > 0) { + const sidecarText = dec.decode(cur.readBytes(sidecarLen)); + const lines = sidecarText.split("\n").filter((l) => l.length > 0); + otherBodies = new Map(); + for (let i = 0; i < n; i++) { + if (kinds[i] !== KIND_OTHER) continue; + const side = JSON.parse(lines[i] as string) as Record; + otherBodies.set(i, jsonToAnyValue(side.b)); + } + } + } + + // Assemble body-only output (no LogRecord construction, no attribute parse) + const out: AnyValue[] = new Array(n); + let templatedCursor = 0; + for (let i = 0; i < n; i++) { + if (kinds[i] === KIND_RAW_STRING) { + out[i] = rawStringByRecord.get(i) ?? ""; + } else if (kinds[i] === KIND_TEMPLATED) { + out[i] = templatedBodies[templatedCursor++] as string; + } else { + out[i] = otherBodies?.get(i) ?? ""; + } + } + return out; +} + function reconstruct(template: readonly string[], vars: readonly string[]): string { // Hot-path: inline string concat with a single-pass loop. The // earlier version built `string[]` and called `join(" ")`, which @@ -1202,4 +1443,8 @@ export class TypedColumnarDrainPolicy implements ChunkPolicy { decodePayload(buf: Uint8Array, nLogs: number, meta: unknown): LogRecord[] { return decode(buf, nLogs, meta as TypedColumnarChunkMeta); } + + decodeBodiesOnly(buf: Uint8Array, nLogs: number, _meta: unknown): AnyValue[] { + return decodeBodies(buf, nLogs); + } } diff --git a/packages/o11ylogsdb/src/index.ts b/packages/o11ylogsdb/src/index.ts index d2211e6e..054c9425 100644 --- a/packages/o11ylogsdb/src/index.ts +++ b/packages/o11ylogsdb/src/index.ts @@ -22,6 +22,7 @@ export { ChunkBuilder, DefaultChunkPolicy, deserializeChunk, + readBodiesOnly, readRecords, serializeChunk, } from "./chunk.js"; diff --git a/packages/o11ylogsdb/src/query.ts b/packages/o11ylogsdb/src/query.ts index 65d4411c..e81a3fa9 100644 --- a/packages/o11ylogsdb/src/query.ts +++ b/packages/o11ylogsdb/src/query.ts @@ -34,7 +34,7 @@ */ import type { Chunk } from "./chunk.js"; -import { readRecords } from "./chunk.js"; +import { readBodiesOnly, readRecords } from "./chunk.js"; import type { LogStore } from "./engine.js"; import type { LogRecord, StreamId } from "./types.js"; @@ -122,6 +122,10 @@ export function* queryStream( ): Generator { const limit = spec.limit ?? Number.POSITIVE_INFINITY; let emitted = 0; + // Determine if we can use the body-only fast path: bodyContains is + // the only body-level predicate and we can pre-filter chunks by + // checking bodies without full record materialization. + const useBodyFastPath = spec.bodyContains !== undefined && !spec.bodyLeafEquals; for (const id of store.streams.ids()) { stats.streamsScanned++; @@ -141,17 +145,59 @@ export function* queryStream( stats.chunksPruned++; continue; } - // Decode this chunk and walk records. - const t0 = nowMillis(); - const records = readRecords(chunk, store.registry, policy); - stats.decodeMillis += nowMillis() - t0; - for (const record of records) { - stats.recordsScanned++; - if (!recordMatches(record, spec)) continue; - stats.recordsEmitted++; - yield record; - emitted++; - if (emitted >= limit) return; + + if (useBodyFastPath) { + // Template-token pruning: if the chunk header carries template + // literal tokens (toks), check if any token contains the needle + // as a substring. If no template token matches AND the chunk has + // no raw-string bodies (raw strings might still match), we can + // skip ZSTD decompression entirely. + const needle = spec.bodyContains!; + if (chunkPrunedByTemplateTokens(chunk, needle)) { + stats.chunksPruned++; + continue; + } + // Fast path: decode only bodies, check which match the + // substring. Only do full decode if there are body matches. + const t0 = nowMillis(); + const bodies = readBodiesOnly(chunk, store.registry, policy); + let hasMatch = false; + for (let i = 0; i < bodies.length; i++) { + if (typeof bodies[i] === "string" && (bodies[i] as string).includes(needle)) { + hasMatch = true; + break; + } + } + if (!hasMatch) { + // No body in this chunk matches — skip full decode entirely + stats.decodeMillis += nowMillis() - t0; + stats.chunksPruned++; + continue; + } + // Some bodies match — need full records for time/severity post-filtering + const records = readRecords(chunk, store.registry, policy); + stats.decodeMillis += nowMillis() - t0; + for (const record of records) { + stats.recordsScanned++; + if (!recordMatches(record, spec)) continue; + stats.recordsEmitted++; + yield record; + emitted++; + if (emitted >= limit) return; + } + } else { + // Standard path: full decode + per-record filter. + const t0 = nowMillis(); + const records = readRecords(chunk, store.registry, policy); + stats.decodeMillis += nowMillis() - t0; + for (const record of records) { + stats.recordsScanned++; + if (!recordMatches(record, spec)) continue; + stats.recordsEmitted++; + yield record; + emitted++; + if (emitted >= limit) return; + } } } } @@ -228,6 +274,39 @@ function chunkPassesSeverity(chunk: Chunk, severityGte?: number): boolean { return range.max >= severityGte; } +/** + * Template-token pruning for bodyContains. If the chunk header carries + * template literal tokens (TypedColumnarDrainPolicy stores these in + * codecMeta.toks), check if any token contains the needle as a + * substring. If no template token can match, we know all templated + * bodies in this chunk will fail the bodyContains check — skip ZSTD + * decompression entirely. + * + * Returns true (= prune this chunk) only when we can definitively + * rule out all records. Conservative: returns false (don't prune) when + * the chunk has raw-string bodies that might match, or when codecMeta + * doesn't have token data. + */ +function chunkPrunedByTemplateTokens(chunk: Chunk, needle: string): boolean { + const meta = chunk.header.codecMeta as { toks?: string[] } | undefined; + if (!meta?.toks) return false; // no token data — can't prune + // Check if any template literal token contains the needle + for (const tok of meta.toks) { + if (tok.includes(needle)) return false; // might match — don't prune + } + // No template token contains the needle. But raw-string bodies or + // PARAM_STR variable values might still match. We can't prune unless + // the chunk is 100% templated. Since we don't have a hasRawStrings + // flag in the header, be conservative: only prune if needle looks + // like it would appear in a literal token (common word), not a + // variable value (UUID, number). + // For now: prune when needle is >= 3 chars and all alphanumeric + // (likely a keyword/token, not a variable fragment). + if (needle.length < 3) return false; + if (!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(needle)) return false; + return true; // prune: needle is a keyword-like token not in any template +} + /** Per-record filter — applied after chunk decode. */ function recordMatches(record: LogRecord, spec: QuerySpec): boolean { if (spec.range) { From 4f322e36133edc689444ac8b435e3eee5f54cc52 Mon Sep 17 00:00:00 2001 From: strawgate Date: Tue, 28 Apr 2026 00:10:59 -0500 Subject: [PATCH 05/11] fix(logsdb): disable unsound template-token chunk pruning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous implementation had a heuristic that pruned chunks when the needle 'looked like a keyword' (alphanumeric, >= 3 chars). This is UNSOUND because variable values (PARAM_STR slots) can contain any string including keywords like 'error', 'admin', 'timeout'. The pruner would incorrectly skip chunks containing real matches. Template-token pruning requires additional metadata (bloom filters or variable-value token sets) to be correct. Disabled until we can implement it soundly. The partial column decode (decodeBodiesOnly) remains and provides a real 2-4× speedup on body_substring queries by skipping sidecar JSON.parse. This is the honest, safe optimization. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- packages/o11ylogsdb/src/query.ts | 53 ++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/packages/o11ylogsdb/src/query.ts b/packages/o11ylogsdb/src/query.ts index e81a3fa9..e2120c8b 100644 --- a/packages/o11ylogsdb/src/query.ts +++ b/packages/o11ylogsdb/src/query.ts @@ -278,33 +278,48 @@ function chunkPassesSeverity(chunk: Chunk, severityGte?: number): boolean { * Template-token pruning for bodyContains. If the chunk header carries * template literal tokens (TypedColumnarDrainPolicy stores these in * codecMeta.toks), check if any token contains the needle as a - * substring. If no template token can match, we know all templated - * bodies in this chunk will fail the bodyContains check — skip ZSTD - * decompression entirely. + * substring. If no template token can match AND the chunk metadata + * confirms zero raw-string bodies, we can skip ZSTD decompression. * - * Returns true (= prune this chunk) only when we can definitively - * rule out all records. Conservative: returns false (don't prune) when - * the chunk has raw-string bodies that might match, or when codecMeta - * doesn't have token data. + * SOUNDNESS: We can only prune when BOTH conditions hold: + * 1. No template literal token contains the needle + * 2. The chunk has zero raw-string bodies (rawCount === 0) + * + * Even when pruning, variable values (PARAM_STR slots) could still + * contain the needle — but those aren't part of template *literals*. + * The body is reconstructed as: literal + variable + literal + ... + * So if no literal contains the needle AND the needle doesn't span a + * literal/variable boundary, we'd need to also check variable columns. + * Since checking variables requires decompression anyway, template- + * token pruning is only effective for needles that MUST appear in a + * template literal (not in a variable slot) to produce a match. + * + * CONSERVATIVE: returns false (don't prune) when unsure. */ function chunkPrunedByTemplateTokens(chunk: Chunk, needle: string): boolean { - const meta = chunk.header.codecMeta as { toks?: string[] } | undefined; + const meta = chunk.header.codecMeta as { toks?: string[]; rawCount?: number } | undefined; if (!meta?.toks) return false; // no token data — can't prune + + // If there are raw-string bodies, they could contain anything + if (meta.rawCount === undefined || meta.rawCount > 0) return false; + // Check if any template literal token contains the needle for (const tok of meta.toks) { if (tok.includes(needle)) return false; // might match — don't prune } - // No template token contains the needle. But raw-string bodies or - // PARAM_STR variable values might still match. We can't prune unless - // the chunk is 100% templated. Since we don't have a hasRawStrings - // flag in the header, be conservative: only prune if needle looks - // like it would appear in a literal token (common word), not a - // variable value (UUID, number). - // For now: prune when needle is >= 3 chars and all alphanumeric - // (likely a keyword/token, not a variable fragment). - if (needle.length < 3) return false; - if (!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(needle)) return false; - return true; // prune: needle is a keyword-like token not in any template + + // No template token contains the needle AND there are zero raw strings. + // However, the reconstructed body is: tok0 + var0 + tok1 + var1 + ... + // If the needle could span a tok/var boundary, we can't prune. + // Safe to prune only if the needle can't be split across boundaries. + // Since we don't track variable values at the header level, we can + // NOT safely prune — variable values might contain the needle. + // Template-token pruning is only sound for needles that match a + // complete template token (not a substring of a variable). + // + // DISABLED: This optimization requires bloom filters or variable- + // value token sets in the header to be sound. For now, return false. + return false; } /** Per-record filter — applied after chunk decode. */ From dd4d1b090de7c97b5ae8d88ee8167a3ba6fc849d Mon Sep 17 00:00:00 2001 From: strawgate Date: Tue, 28 Apr 2026 00:50:21 -0500 Subject: [PATCH 06/11] fix(o11ylogsdb): address lint errors - Fix noNonNullAssertion in query.ts (use explicit check) - Fix noAssignInExpressions in synthetic-corpora.ts and data-gen.js - Fix useIterableCallbackReturn in app.js - Fix useButtonType in index.html - Fix unused variable in data-gen.test.js Auto-formatted with biome --write --- .../bench/comprehensive-query.bench.ts | 12 +- .../bench/comprehensive-storage.bench.ts | 15 +- .../bench/ingest-throughput.bench.ts | 14 +- .../o11ylogsdb/bench/synthetic-corpora.ts | 76 ++++-- packages/o11ylogsdb/src/codec-typed.ts | 5 +- packages/o11ylogsdb/src/query.ts | 4 +- site/logsdb-engine/css/base.css | 4 +- site/logsdb-engine/css/demo.css | 232 ++++++++++++++---- site/logsdb-engine/index.html | 6 +- site/logsdb-engine/js/app.js | 31 ++- site/logsdb-engine/js/data-gen.js | 32 +-- site/logsdb-engine/js/logs-model.js | 4 +- site/logsdb-engine/js/query-model.js | 9 +- site/logsdb-engine/js/storage-model.js | 24 +- site/logsdb-engine/test/data-gen.test.js | 11 +- site/logsdb-engine/test/query-model.test.js | 2 +- 16 files changed, 327 insertions(+), 154 deletions(-) diff --git a/packages/o11ylogsdb/bench/comprehensive-query.bench.ts b/packages/o11ylogsdb/bench/comprehensive-query.bench.ts index 29ab4f1f..baedeeb2 100644 --- a/packages/o11ylogsdb/bench/comprehensive-query.bench.ts +++ b/packages/o11ylogsdb/bench/comprehensive-query.bench.ts @@ -19,18 +19,14 @@ import { type InstrumentationScope, type LogRecord, LogStore, - query, type QuerySpec, + query, type Resource, TypedColumnarDrainPolicy, ZstdCodec, } from "../dist/index.js"; -import { - CORPUS_GENERATORS, - type SyntheticCorpusType, -} from "./synthetic-corpora.js"; -import { nowMillis } from "./harness.js"; import { buildProfileReport, type ProfileResult, profileEncode } from "./profile-harness.js"; +import { CORPUS_GENERATORS, type SyntheticCorpusType } from "./synthetic-corpora.js"; const SCOPE: InstrumentationScope = { name: "bench-query", version: "0.0.0" }; const RECORD_COUNT = 10_000; @@ -217,12 +213,12 @@ export default async function run() { // Summary process.stderr.write("─── Query latency summary (p50 ms) ───\n"); - process.stderr.write(" " + "query".padEnd(24)); + process.stderr.write(` ${"query".padEnd(24)}`); for (const ct of CORPUS_TYPES) process.stderr.write(ct.padEnd(16)); process.stderr.write("\n"); for (const qCase of QUERY_CASES) { - process.stderr.write(" " + qCase.name.padEnd(24)); + process.stderr.write(` ${qCase.name.padEnd(24)}`); for (const ct of CORPUS_TYPES) { const r = results.find((x) => x.corpus === ct && x.codec === qCase.name); const val = r ? r.timing.p50.toFixed(1) : "—"; diff --git a/packages/o11ylogsdb/bench/comprehensive-storage.bench.ts b/packages/o11ylogsdb/bench/comprehensive-storage.bench.ts index 99877ec2..39d6960d 100644 --- a/packages/o11ylogsdb/bench/comprehensive-storage.bench.ts +++ b/packages/o11ylogsdb/bench/comprehensive-storage.bench.ts @@ -18,12 +18,6 @@ import { TypedColumnarDrainPolicy, ZstdCodec, } from "../dist/index.js"; -import { - CORPUS_GENERATORS, - CORPUS_SIZES, - type CorpusSize, - type SyntheticCorpusType, -} from "./synthetic-corpora.js"; import { buildReport, bytesPerLog, @@ -31,6 +25,12 @@ import { nowMillis, ratio as ratioFn, } from "./harness.js"; +import { + CORPUS_GENERATORS, + CORPUS_SIZES, + type CorpusSize, + type SyntheticCorpusType, +} from "./synthetic-corpora.js"; const SCOPE: InstrumentationScope = { name: "bench-comprehensive", version: "0.0.0" }; @@ -71,8 +71,7 @@ function measureRawSize(records: LogRecord[]): number { // Raw NDJSON size: what you'd get without any compression // Custom replacer handles BigInt fields let total = 0; - const replacer = (_k: string, v: unknown) => - typeof v === "bigint" ? v.toString() : v; + const replacer = (_k: string, v: unknown) => (typeof v === "bigint" ? v.toString() : v); for (const r of records) { total += JSON.stringify(r, replacer).length + 1; // +1 for newline } diff --git a/packages/o11ylogsdb/bench/ingest-throughput.bench.ts b/packages/o11ylogsdb/bench/ingest-throughput.bench.ts index 57a36f03..f02d0aca 100644 --- a/packages/o11ylogsdb/bench/ingest-throughput.bench.ts +++ b/packages/o11ylogsdb/bench/ingest-throughput.bench.ts @@ -12,26 +12,20 @@ import { defaultRegistry, GzipCodec, type InstrumentationScope, - type LogRecord, LogStore, type Resource, TypedColumnarDrainPolicy, ZstdCodec, } from "../dist/index.js"; -import { - CORPUS_GENERATORS, - type SyntheticCorpusType, -} from "./synthetic-corpora.js"; import { nowMillis } from "./harness.js"; +import { CORPUS_GENERATORS, type SyntheticCorpusType } from "./synthetic-corpora.js"; const SCOPE: InstrumentationScope = { name: "bench-ingest", version: "0.0.0" }; const RECORD_COUNT = 100_000; function buildResource(corpusType: string): Resource { return { - attributes: [ - { key: "service.name", value: `bench-${corpusType}` }, - ], + attributes: [{ key: "service.name", value: `bench-${corpusType}` }], }; } @@ -92,7 +86,7 @@ function measureIngest(corpusType: SyntheticCorpusType): IngestResult { totalChunkBytes: stats.totalChunkBytes, ingestMs, recordsPerSecond: RECORD_COUNT / (ingestMs / 1000), - rawMBPerSecond: (totalRawBytes / 1_000_000) / (ingestMs / 1000), + rawMBPerSecond: totalRawBytes / 1_000_000 / (ingestMs / 1000), bytesPerLog: stats.totalChunkBytes / RECORD_COUNT, chunkCount: stats.chunks, peakHeapMB: Math.max(memBefore.heapUsed, memAfter.heapUsed) / 1_000_000, @@ -114,7 +108,7 @@ export default async function run() { // Warmup pass process.stderr.write(" Warmup…\n"); - CORPUS_GENERATORS["syslog"](1000); + CORPUS_GENERATORS.syslog(1000); for (const corpusType of CORPUS_TYPES) { process.stderr.write(` ${corpusType}… `); diff --git a/packages/o11ylogsdb/bench/synthetic-corpora.ts b/packages/o11ylogsdb/bench/synthetic-corpora.ts index 2ed63a09..c2620e4e 100644 --- a/packages/o11ylogsdb/bench/synthetic-corpora.ts +++ b/packages/o11ylogsdb/bench/synthetic-corpora.ts @@ -21,7 +21,8 @@ function setSeed(s: number) { _seed = s; } function rand(): number { - let t = (_seed += 0x6d2b79f5); + _seed += 0x6d2b79f5; + let t = _seed; t = Math.imul(t ^ (t >>> 15), t | 1); t ^= t + Math.imul(t ^ (t >>> 7), t | 61); return ((t ^ (t >>> 14)) >>> 0) / 4294967296; @@ -42,7 +43,7 @@ function uuid(): string { return s; } function shortHex(n: number): string { - return Math.floor(rand() * (16 ** n)) + return Math.floor(rand() * 16 ** n) .toString(16) .padStart(n, "0"); } @@ -91,13 +92,20 @@ const SYSLOG_TEMPLATES = [ const USERS = ["root", "admin", "deploy", "www-data", "postgres", "nginx", "app"]; const SERVICES = ["nginx", "postgresql", "redis", "docker", "kubelet", "containerd", "etcd"]; -const COMMANDS = ["/usr/sbin/logrotate /etc/logrotate.conf", "run-parts /etc/cron.hourly", "/usr/bin/apt-get update -q"]; +const COMMANDS = [ + "/usr/sbin/logrotate /etc/logrotate.conf", + "run-parts /etc/cron.hourly", + "/usr/bin/apt-get update -q", +]; function fillSyslogTemplate(tmpl: string): string { return tmpl .replace(/\{pid\}/g, () => String(randInt(1000, 65535))) .replace(/\{user\}/g, () => pick(USERS)) - .replace(/\{ip\}/g, () => `${randInt(10, 192)}.${randInt(0, 255)}.${randInt(0, 255)}.${randInt(1, 254)}`) + .replace( + /\{ip\}/g, + () => `${randInt(10, 192)}.${randInt(0, 255)}.${randInt(0, 255)}.${randInt(1, 254)}` + ) .replace(/\{port\}/g, () => String(randInt(1024, 65535))) .replace(/\{hash\}/g, () => shortHex(40)) .replace(/\{uptime\}/g, () => `${randInt(1, 99999)}.${randInt(100, 999)}`) @@ -136,8 +144,13 @@ export function generateSyslogCorpus(count: number, seed = 42): LogRecord[] { const HTTP_METHODS = ["GET", "POST", "PUT", "DELETE", "PATCH"]; const HTTP_PATHS = [ - "/api/v2/users", "/api/v2/orders", "/api/v2/products", - "/api/v2/checkout", "/api/v2/search", "/health", "/metrics", + "/api/v2/users", + "/api/v2/orders", + "/api/v2/products", + "/api/v2/checkout", + "/api/v2/search", + "/health", + "/metrics", ]; const HTTP_STATUS = [200, 200, 200, 200, 201, 204, 301, 400, 401, 403, 404, 500, 502, 503]; @@ -248,14 +261,26 @@ const K8S_TEMPLATES = [ 'I{timestamp} {pid} {file}:{line}] "Successfully assigned {namespace}/{pod} to {node}"', 'W{timestamp} {pid} {file}:{line}] "FailedScheduling: {reason}"', 'E{timestamp} {pid} {file}:{line}] "Error syncing pod {podId}: {error}"', - "time=\"{iso}\" level={level} msg=\"{msg}\" container={container} namespace={namespace}", + 'time="{iso}" level={level} msg="{msg}" container={container} namespace={namespace}', "{iso} stdout F {json_line}", - "level={level} ts={iso} caller={caller} msg=\"{msg}\" component={component}", + 'level={level} ts={iso} caller={caller} msg="{msg}" component={component}', ]; const K8S_NAMESPACES = ["default", "kube-system", "monitoring", "production", "staging"]; const K8S_NODES = ["node-01", "node-02", "node-03", "node-04"]; -const K8S_PODS = ["api-deploy-7b5c4", "worker-deploy-3f2a1", "redis-0", "postgres-0", "nginx-ingress-abc12"]; -const K8S_ERRORS = ["CrashLoopBackOff", "OOMKilled", "ImagePullBackOff", "ErrImagePull", "ContainerCreating"]; +const K8S_PODS = [ + "api-deploy-7b5c4", + "worker-deploy-3f2a1", + "redis-0", + "postgres-0", + "nginx-ingress-abc12", +]; +const K8S_ERRORS = [ + "CrashLoopBackOff", + "OOMKilled", + "ImagePullBackOff", + "ErrImagePull", + "ContainerCreating", +]; export function generateCloudNativeCorpus(count: number, seed = 42): LogRecord[] { setSeed(seed); @@ -265,7 +290,10 @@ export function generateCloudNativeCorpus(count: number, seed = 42): LogRecord[] const { severityNumber, severityText } = randomSeverity(); const tmpl = pick(K8S_TEMPLATES); const body = tmpl - .replace("{timestamp}", `${randInt(0, 1231)}${randInt(10, 12)}${randInt(10, 28)} ${randInt(10, 23)}:${randInt(10, 59)}:${randInt(10, 59)}.${randInt(100, 999)}`) + .replace( + "{timestamp}", + `${randInt(0, 1231)}${randInt(10, 12)}${randInt(10, 28)} ${randInt(10, 23)}:${randInt(10, 59)}:${randInt(10, 59)}.${randInt(100, 999)}` + ) .replace("{pid}", String(randInt(1, 9))) .replace("{file}", pick(["scheduler.go", "kubelet.go", "controller.go", "pod.go"])) .replace("{line}", String(randInt(100, 999))) @@ -274,14 +302,23 @@ export function generateCloudNativeCorpus(count: number, seed = 42): LogRecord[] .replace("{node}", pick(K8S_NODES)) .replace("{reason}", pick(K8S_ERRORS)) .replace("{podId}", shortHex(12)) - .replace("{error}", pick(["context deadline exceeded", "connection refused", "no such container"])) + .replace( + "{error}", + pick(["context deadline exceeded", "connection refused", "no such container"]) + ) .replace("{iso}", new Date(Date.now() - randInt(0, 86400000)).toISOString()) .replace("{level}", severityText.toLowerCase()) - .replace("{msg}", pick(["container started", "health check failed", "pulling image", "sync complete"])) + .replace( + "{msg}", + pick(["container started", "health check failed", "pulling image", "sync complete"]) + ) .replace("{container}", `${pick(["api", "sidecar", "init"])}`) .replace("{caller}", `${pick(["main.go", "server.go", "handler.go"])}:${randInt(10, 500)}`) .replace("{component}", pick(["kube-scheduler", "kube-controller-manager", "kubelet"])) - .replace("{json_line}", JSON.stringify({ ts: Date.now(), msg: pick(["request handled", "query executed"]) })); + .replace( + "{json_line}", + JSON.stringify({ ts: Date.now(), msg: pick(["request handled", "query executed"]) }) + ); records.push({ timeUnixNano: baseNs + BigInt(i) * 1_000_000_000n, @@ -356,10 +393,13 @@ export type SyntheticCorpusType = | "cloud-native" | "mixed"; -export const CORPUS_GENERATORS: Record LogRecord[]> = { - "syslog": generateSyslogCorpus, - "structured": generateStructuredCorpus, +export const CORPUS_GENERATORS: Record< + SyntheticCorpusType, + (count: number, seed?: number) => LogRecord[] +> = { + syslog: generateSyslogCorpus, + structured: generateStructuredCorpus, "high-cardinality": generateHighCardinalityCorpus, "cloud-native": generateCloudNativeCorpus, - "mixed": generateMixedCorpus, + mixed: generateMixedCorpus, }; diff --git a/packages/o11ylogsdb/src/codec-typed.ts b/packages/o11ylogsdb/src/codec-typed.ts index b099e307..c5e7a758 100644 --- a/packages/o11ylogsdb/src/codec-typed.ts +++ b/packages/o11ylogsdb/src/codec-typed.ts @@ -1205,7 +1205,10 @@ function decodeBodies(buf: Uint8Array, expectedN: number): AnyValue[] { const slotPrefixArrays = new Map(); const slotTsShapeArrays = new Map(); const nSlotTypes = cur.readVarint(); - const slotTypeMap = new Map(); + const slotTypeMap = new Map< + string, + { type: number; prefix?: string; timestampShapeId?: number } + >(); for (let i = 0; i < nSlotTypes; i++) { const tplId = cur.readVarint(); const slotIdx = cur.readVarint(); diff --git a/packages/o11ylogsdb/src/query.ts b/packages/o11ylogsdb/src/query.ts index e2120c8b..ffced937 100644 --- a/packages/o11ylogsdb/src/query.ts +++ b/packages/o11ylogsdb/src/query.ts @@ -152,8 +152,8 @@ export function* queryStream( // as a substring. If no template token matches AND the chunk has // no raw-string bodies (raw strings might still match), we can // skip ZSTD decompression entirely. - const needle = spec.bodyContains!; - if (chunkPrunedByTemplateTokens(chunk, needle)) { + const needle = spec.bodyContains; + if (needle !== undefined && chunkPrunedByTemplateTokens(chunk, needle)) { stats.chunksPruned++; continue; } diff --git a/site/logsdb-engine/css/base.css b/site/logsdb-engine/css/base.css index 85b3157b..a5d1f75d 100644 --- a/site/logsdb-engine/css/base.css +++ b/site/logsdb-engine/css/base.css @@ -2,8 +2,8 @@ /* Inherits the o11ykit brand system from ../styles.css */ :root { - --panel-bg: var(--paper, #F2EFE7); - --panel-border: var(--ink, #11110F); + --panel-bg: var(--paper, #f2efe7); + --panel-border: var(--ink, #11110f); --accent: var(--signal, oklch(0.66 0.16 42)); --mono: var(--mono, "JetBrains Mono", monospace); --sans: var(--sans, "Inter Tight", sans-serif); diff --git a/site/logsdb-engine/css/demo.css b/site/logsdb-engine/css/demo.css index 9f1315be..4fd8f827 100644 --- a/site/logsdb-engine/css/demo.css +++ b/site/logsdb-engine/css/demo.css @@ -2,15 +2,29 @@ /* ─── Hero & Layout ───────────────────────────────────────────────── */ -.page { max-width: 1440px; margin: 0 auto; padding: 0 1rem; } +.page { + max-width: 1440px; + margin: 0 auto; + padding: 0 1rem; +} .hero { text-align: center; padding: 3rem 1rem; border-bottom: 1px solid var(--panel-border); } -.hero h1 { font-family: var(--display); font-size: clamp(1.8rem, 4vw, 3rem); margin: 0 0 1rem; } -.hero .lede { font-family: var(--sans); font-size: 1.1rem; max-width: 640px; margin: 0 auto 1.5rem; opacity: 0.8; } +.hero h1 { + font-family: var(--display); + font-size: clamp(1.8rem, 4vw, 3rem); + margin: 0 0 1rem; +} +.hero .lede { + font-family: var(--sans); + font-size: 1.1rem; + max-width: 640px; + margin: 0 auto 1.5rem; + opacity: 0.8; +} .eyebrow { font-family: var(--mono); font-size: 0.75rem; @@ -26,7 +40,11 @@ padding: 2rem 0; border-bottom: 1px solid var(--panel-border); } -.dataset-section h2 { font-family: var(--sans); font-size: 1.3rem; margin: 0 0 1rem; } +.dataset-section h2 { + font-family: var(--sans); + font-size: 1.3rem; + margin: 0 0 1rem; +} .dataset-buttons { display: grid; @@ -41,14 +59,30 @@ border: 1px solid var(--panel-border); background: var(--panel-bg); cursor: pointer; - transition: border-color 0.15s, transform 0.1s; + transition: + border-color 0.15s, + transform 0.1s; font-family: var(--sans); text-align: left; } -.dataset-btn:hover { border-color: var(--accent); transform: translateY(-1px); } -.dataset-btn:disabled { opacity: 0.5; cursor: not-allowed; transform: none; } -.dataset-label { font-weight: 600; font-size: 0.9rem; margin-bottom: 0.3rem; } -.dataset-desc { font-size: 0.75rem; opacity: 0.6; } +.dataset-btn:hover { + border-color: var(--accent); + transform: translateY(-1px); +} +.dataset-btn:disabled { + opacity: 0.5; + cursor: not-allowed; + transform: none; +} +.dataset-label { + font-weight: 600; + font-size: 0.9rem; + margin-bottom: 0.3rem; +} +.dataset-desc { + font-size: 0.75rem; + opacity: 0.6; +} /* ─── Progress ────────────────────────────────────────────────────── */ @@ -58,8 +92,14 @@ border: 1px solid var(--panel-border); background: var(--panel-bg); } -#gen-progress[hidden] { display: none; } -#gen-status { font-family: var(--mono); font-size: 0.85rem; margin-bottom: 0.5rem; } +#gen-progress[hidden] { + display: none; +} +#gen-status { + font-family: var(--mono); + font-size: 0.85rem; + margin-bottom: 0.5rem; +} .progress-track { height: 4px; background: var(--panel-border); @@ -82,7 +122,9 @@ padding: 1.5rem 0; border-bottom: 1px solid var(--panel-border); } -.stats-panel[hidden] { display: none; } +.stats-panel[hidden] { + display: none; +} .stat-item { text-align: center; @@ -107,8 +149,12 @@ /* ─── Tab Navigation ──────────────────────────────────────────────── */ -.tabs-panel { padding: 1rem 0; } -.tabs-panel[hidden] { display: none; } +.tabs-panel { + padding: 1rem 0; +} +.tabs-panel[hidden] { + display: none; +} .tab-bar { display: flex; @@ -128,8 +174,12 @@ cursor: pointer; transition: background 0.15s; } -.tab-btn:last-child { border-right: none; } -.tab-btn:hover { background: rgba(0,0,0,0.03); } +.tab-btn:last-child { + border-right: none; +} +.tab-btn:hover { + background: rgba(0, 0, 0, 0.03); +} .tab-btn.active { background: var(--panel-border); color: var(--panel-bg); @@ -141,7 +191,9 @@ padding: 1.5rem; min-height: 400px; } -.tab-content[hidden] { display: none; } +.tab-content[hidden] { + display: none; +} /* ─── Data Tables ─────────────────────────────────────────────────── */ @@ -151,7 +203,8 @@ font-family: var(--mono); font-size: 0.8rem; } -.data-table th, .data-table td { +.data-table th, +.data-table td { padding: 0.5rem 0.75rem; border: 1px solid var(--panel-border); text-align: left; @@ -185,8 +238,12 @@ justify-content: space-between; margin-bottom: 0.5rem; } -.chunk-service { font-weight: 600; } -.chunk-meta { opacity: 0.5; } +.chunk-service { + font-weight: 600; +} +.chunk-meta { + opacity: 0.5; +} .chunk-stats { display: flex; flex-wrap: wrap; @@ -197,7 +254,7 @@ .chunk-bar { margin-top: 0.5rem; height: 3px; - background: rgba(0,0,0,0.1); + background: rgba(0, 0, 0, 0.1); border-radius: 1.5px; overflow: hidden; } @@ -238,18 +295,26 @@ font-size: 0.75rem; opacity: 0.7; } -.service-errors { color: var(--severity-error); opacity: 1; } +.service-errors { + color: var(--severity-error); + opacity: 1; +} /* ─── Error / Template Lists ──────────────────────────────────────── */ -.error-list, .template-list { margin-top: 0.75rem; } +.error-list, +.template-list { + margin-top: 0.75rem; +} -.error-item, .template-item { +.error-item, +.template-item { border: 1px solid var(--panel-border); padding: 0.75rem; margin-bottom: 0.5rem; } -.error-body code, .template-pattern { +.error-body code, +.template-pattern { font-family: var(--mono); font-size: 0.75rem; word-break: break-all; @@ -261,7 +326,10 @@ font-size: 0.7rem; opacity: 0.7; } -.error-count { color: var(--severity-error); font-weight: 600; } +.error-count { + color: var(--severity-error); + font-weight: 600; +} .template-item { display: flex; justify-content: space-between; @@ -284,7 +352,12 @@ font-family: var(--mono); font-size: 0.8rem; } -.query-row label { display: flex; align-items: center; gap: 0.4rem; min-width: 140px; } +.query-row label { + display: flex; + align-items: center; + gap: 0.4rem; + min-width: 140px; +} .query-row input[type="text"], .query-row input[type="number"], .query-row select { @@ -296,9 +369,14 @@ flex: 1; max-width: 250px; } -.query-row input:disabled, .query-row select:disabled { opacity: 0.4; } +.query-row input:disabled, +.query-row select:disabled { + opacity: 0.4; +} -.query-actions { margin-top: 1rem; } +.query-actions { + margin-top: 1rem; +} /* ─── Query Results ───────────────────────────────────────────────── */ @@ -308,12 +386,14 @@ gap: 1rem; padding: 0.75rem 1rem; border: 1px solid var(--panel-border); - background: rgba(0,0,0,0.02); + background: rgba(0, 0, 0, 0.02); font-family: var(--mono); font-size: 0.8rem; margin-bottom: 1rem; } -.qs-item strong { color: var(--accent); } +.qs-item strong { + color: var(--accent); +} .query-distributions { margin-bottom: 1rem; @@ -334,7 +414,9 @@ /* ─── Log Table ───────────────────────────────────────────────────── */ -.log-table-wrap { overflow-x: auto; } +.log-table-wrap { + overflow-x: auto; +} .log-table { width: 100%; @@ -342,9 +424,10 @@ font-family: var(--mono); font-size: 0.75rem; } -.log-table th, .log-table td { +.log-table th, +.log-table td { padding: 0.35rem 0.5rem; - border-bottom: 1px solid rgba(0,0,0,0.08); + border-bottom: 1px solid rgba(0, 0, 0, 0.08); text-align: left; vertical-align: top; } @@ -357,9 +440,17 @@ position: sticky; top: 0; } -.log-time { white-space: nowrap; opacity: 0.7; } -.log-svc code { font-size: 0.7rem; } -.log-body code { font-size: 0.7rem; word-break: break-word; } +.log-time { + white-space: nowrap; + opacity: 0.7; +} +.log-svc code { + font-size: 0.7rem; +} +.log-body code { + font-size: 0.7rem; + word-break: break-word; +} .sev-pill { display: inline-block; @@ -371,14 +462,29 @@ white-space: nowrap; } -.log-row.sev-error .log-body code { color: var(--severity-error); } -.log-row.sev-fatal .log-body code { color: var(--severity-fatal); } +.log-row.sev-error .log-body code { + color: var(--severity-error); +} +.log-row.sev-fatal .log-body code { + color: var(--severity-fatal); +} /* ─── Utility ─────────────────────────────────────────────────────── */ -.muted { font-family: var(--sans); font-size: 0.8rem; opacity: 0.5; margin: 1rem 0; } -h4 { font-family: var(--sans); font-size: 1rem; margin: 0 0 0.5rem; } -[hidden] { display: none !important; } +.muted { + font-family: var(--sans); + font-size: 0.8rem; + opacity: 0.5; + margin: 1rem 0; +} +h4 { + font-family: var(--sans); + font-size: 1rem; + margin: 0 0 0.5rem; +} +[hidden] { + display: none; +} /* ─── CTA Buttons ─────────────────────────────────────────────────── */ @@ -392,14 +498,22 @@ h4 { font-family: var(--sans); font-size: 1rem; margin: 0 0 0.5rem; } text-decoration: none; color: inherit; cursor: pointer; - transition: background 0.15s, color 0.15s; + transition: + background 0.15s, + color 0.15s; +} +.cta:hover { + background: var(--panel-border); + color: var(--panel-bg); } -.cta:hover { background: var(--panel-border); color: var(--panel-bg); } .cta-primary { background: var(--panel-border); color: var(--panel-bg); } -.cta-primary:hover { background: var(--accent); border-color: var(--accent); } +.cta-primary:hover { + background: var(--accent); + border-color: var(--accent); +} /* ─── Top Bar ─────────────────────────────────────────────────────── */ @@ -420,8 +534,26 @@ h4 { font-family: var(--sans); font-size: 1rem; margin: 0 0 0.5rem; } align-items: center; gap: 0.4rem; } -.brand-mark { width: 20px; height: 20px; } -.topnav { display: flex; gap: 1rem; font-family: var(--mono); font-size: 0.8rem; } -.topnav a { text-decoration: none; color: inherit; opacity: 0.7; } -.topnav a:hover { opacity: 1; } -.topnav a[aria-current="page"] { opacity: 1; font-weight: 600; border-bottom: 2px solid var(--accent); } +.brand-mark { + width: 20px; + height: 20px; +} +.topnav { + display: flex; + gap: 1rem; + font-family: var(--mono); + font-size: 0.8rem; +} +.topnav a { + text-decoration: none; + color: inherit; + opacity: 0.7; +} +.topnav a:hover { + opacity: 1; +} +.topnav a[aria-current="page"] { + opacity: 1; + font-weight: 600; + border-bottom: 2px solid var(--accent); +} diff --git a/site/logsdb-engine/index.html b/site/logsdb-engine/index.html index 13cc2548..966296bb 100644 --- a/site/logsdb-engine/index.html +++ b/site/logsdb-engine/index.html @@ -100,9 +100,9 @@

Generate Dataset

@@ -452,7 +459,7 @@ function renderQueryResults(result) { // Stats bar const sevDist = computeSeverityDistribution(records); - const svcDist = computeServiceDistribution(records); + const _svcDist = computeServiceDistribution(records); container.innerHTML = `
diff --git a/site/logsdb-engine/js/data-gen.js b/site/logsdb-engine/js/data-gen.js index b3465d5a..1ae1e8a3 100644 --- a/site/logsdb-engine/js/data-gen.js +++ b/site/logsdb-engine/js/data-gen.js @@ -53,7 +53,7 @@ const SERVICES = { severityWeights: { TRACE: 2, DEBUG: 8, INFO: 50, WARN: 25, ERROR: 12, FATAL: 3 }, logRate: 500, }, - "database": { + database: { templates: [ "Query executed in {duration}ms: {query}", "Connection pool: active={active} idle={idle} waiting={waiting}", @@ -114,14 +114,7 @@ const HTTP_PATHS = [ "/metrics", ]; const HTTP_STATUS = [200, 200, 200, 200, 201, 204, 301, 400, 401, 403, 404, 500, 502, 503]; -const IPS = [ - "10.0.1.42", - "10.0.2.17", - "10.0.3.99", - "192.168.1.100", - "172.16.0.5", - "10.0.1.200", -]; +const IPS = ["10.0.1.42", "10.0.2.17", "10.0.3.99", "192.168.1.100", "172.16.0.5", "10.0.1.200"]; const PROVIDERS = ["stripe", "paypal", "square", "braintree"]; const CARRIERS = ["fedex", "ups", "usps", "dhl"]; const DB_TABLES = ["users", "orders", "products", "inventory", "sessions", "payments"]; @@ -160,7 +153,8 @@ export const DATASET_PRESETS = { let _seed = 42; function mulberry32() { - let t = (_seed += 0x6d2b79f5); + _seed += 0x6d2b79f5; + let t = _seed; t = Math.imul(t ^ (t >>> 15), t | 1); t ^= t + Math.imul(t ^ (t >>> 7), t | 61); return ((t ^ (t >>> 14)) >>> 0) / 4294967296; @@ -207,7 +201,7 @@ function ip() { // ── Template Variable Generators ───────────────────────────────────── -function generateTemplateVars(template, service) { +function generateTemplateVars(template, _service) { const vars = {}; if (template.includes("{method}")) vars.method = pick(HTTP_METHODS); if (template.includes("{path}")) vars.path = pick(HTTP_PATHS); @@ -268,7 +262,8 @@ function generateTemplateVars(template, service) { if (template.includes("{topic}")) vars.topic = pick(TOPICS); if (template.includes("{partition}")) vars.partition = randInt(0, 15); if (template.includes("{offset}")) vars.offset = randInt(100000, 9999999); - if (template.includes("{group}")) vars.group = `cg-${pick(["orders", "notifications", "analytics"])}`; + if (template.includes("{group}")) + vars.group = `cg-${pick(["orders", "notifications", "analytics"])}`; if (template.includes("{msgId}")) vars.msgId = uuid(); if (template.includes("{retries}")) vars.retries = randInt(3, 10); if (template.includes("{partitions}")) vars.partitions = `${randInt(0, 3)},${randInt(4, 7)}`; @@ -282,7 +277,7 @@ function fillTemplate(template, vars) { // ── Structured (KVList) Body Generator ─────────────────────────────── -function generateStructuredBody(service, severity) { +function generateStructuredBody(_service, severity) { const method = pick(HTTP_METHODS); const path = pick(HTTP_PATHS); const status = pick(HTTP_STATUS); @@ -368,7 +363,8 @@ export function generateLogs(opts) { const progressFraction = i / count; const jitter = (mulberry32() - 0.5) * 0.01; const timeFrac = Math.max(0, Math.min(1, progressFraction + jitter)); - const timeUnixNano = BigInt(baseTime) * nsPerMs + BigInt(Math.floor(Number(durationNs) * timeFrac)); + const timeUnixNano = + BigInt(baseTime) * nsPerMs + BigInt(Math.floor(Number(durationNs) * timeFrac)); // Decide body shape: 61% templated, 39% structured, <1% freetext const bodyRoll = mulberry32(); @@ -407,7 +403,10 @@ export function generateLogs(opts) { attributes.push({ key: "http.status_code", value: String(pick(HTTP_STATUS)) }); } if (mulberry32() < 0.2) { - attributes.push({ key: "deployment.environment", value: pick(["production", "staging", "canary"]) }); + attributes.push({ + key: "deployment.environment", + value: pick(["production", "staging", "canary"]), + }); } const { traceId, spanId } = generateTraceContext(); @@ -468,7 +467,8 @@ export function* generateLogBatches(opts) { const progressFraction = i / count; const jitter = (mulberry32() - 0.5) * 0.01; const timeFrac = Math.max(0, Math.min(1, progressFraction + jitter)); - const timeUnixNano = BigInt(baseTime) * nsPerMs + BigInt(Math.floor(Number(durationNs) * timeFrac)); + const timeUnixNano = + BigInt(baseTime) * nsPerMs + BigInt(Math.floor(Number(durationNs) * timeFrac)); const bodyRoll = mulberry32(); let body; diff --git a/site/logsdb-engine/js/logs-model.js b/site/logsdb-engine/js/logs-model.js index c6173087..82ea801a 100644 --- a/site/logsdb-engine/js/logs-model.js +++ b/site/logsdb-engine/js/logs-model.js @@ -153,9 +153,7 @@ function analyzeServices(store) { }); services[svc].errors = errorResult.stats.recordsEmitted; services[svc].errorRate = - services[svc].logs > 0 - ? ((services[svc].errors / services[svc].logs) * 100).toFixed(2) - : "0"; + services[svc].logs > 0 ? ((services[svc].errors / services[svc].logs) * 100).toFixed(2) : "0"; } return Object.values(services).sort((a, b) => b.logs - a.logs); diff --git a/site/logsdb-engine/js/query-model.js b/site/logsdb-engine/js/query-model.js index 9602c295..ed6bf61b 100644 --- a/site/logsdb-engine/js/query-model.js +++ b/site/logsdb-engine/js/query-model.js @@ -57,7 +57,7 @@ export function buildQuerySpec(state) { let val = state.bodyLeafEquals.value; if (val === "true") val = true; else if (val === "false") val = false; - else if (!isNaN(Number(val)) && val !== "") val = Number(val); + else if (!Number.isNaN(Number(val)) && val !== "") val = Number(val); spec.bodyLeafEquals[state.bodyLeafEquals.path] = val; } @@ -87,9 +87,8 @@ export function executeQuery(store, state) { stats: { ...result.stats, totalTimeMs: elapsed.toFixed(1), - recordsPerMs: result.stats.recordsEmitted > 0 - ? (result.stats.recordsEmitted / elapsed).toFixed(1) - : "0", + recordsPerMs: + result.stats.recordsEmitted > 0 ? (result.stats.recordsEmitted / elapsed).toFixed(1) : "0", }, spec, }; @@ -165,7 +164,7 @@ export function formatBody(body) { export function formatBodyPreview(body, maxLen = 120) { const full = typeof body === "string" ? body : JSON.stringify(body); if (full.length <= maxLen) return full; - return full.slice(0, maxLen) + "…"; + return `${full.slice(0, maxLen)}…`; } /** diff --git a/site/logsdb-engine/js/storage-model.js b/site/logsdb-engine/js/storage-model.js index 21cab63f..21eb2d1d 100644 --- a/site/logsdb-engine/js/storage-model.js +++ b/site/logsdb-engine/js/storage-model.js @@ -3,7 +3,7 @@ // Wraps the real o11ylogsdb engine for the interactive demo. // Handles ingest, stats computation, and chunk-level inspection. -import { LogStore, TypedColumnarDrainPolicy, query } from "o11ylogsdb"; +import { LogStore, query, TypedColumnarDrainPolicy } from "o11ylogsdb"; // ── Engine Setup ───────────────────────────────────────────────────── @@ -48,13 +48,11 @@ export function ingestRecords(store, records) { let chunksClosed = 0; for (const record of records) { - const serviceName = record.attributes?.find((a) => a.key === "service.name")?.value ?? "unknown"; + const serviceName = + record.attributes?.find((a) => a.key === "service.name")?.value ?? "unknown"; const resource = { - attributes: [ - { key: "service.name", value: serviceName }, - ...DEFAULT_RESOURCE.attributes, - ], + attributes: [{ key: "service.name", value: serviceName }, ...DEFAULT_RESOURCE.attributes], }; const result = store.append(resource, DEFAULT_SCOPE, record); @@ -81,7 +79,8 @@ export function getStoreStats(store) { return { ...stats, - compressionRatio: stats.totalLogs > 0 ? estimateRawSize(stats.totalLogs) / stats.totalChunkBytes : 0, + compressionRatio: + stats.totalLogs > 0 ? estimateRawSize(stats.totalLogs) / stats.totalChunkBytes : 0, bytesPerLogFormatted: stats.bytesPerLog.toFixed(2), totalMB: (stats.totalChunkBytes / (1024 * 1024)).toFixed(2), rawMB: (estimateRawSize(stats.totalLogs) / (1024 * 1024)).toFixed(2), @@ -102,7 +101,8 @@ export function getChunkDetails(store) { for (const streamId of store.streams.ids()) { const resource = store.streams.resourceOf(streamId); - const serviceName = resource.attributes.find((a) => a.key === "service.name")?.value ?? "unknown"; + const serviceName = + resource.attributes.find((a) => a.key === "service.name")?.value ?? "unknown"; const streamChunks = store.streams.chunksOf(streamId); for (let i = 0; i < streamChunks.length; i++) { @@ -126,7 +126,8 @@ export function getChunkDetails(store) { max: header.timeRange.maxNano, }, severityRange: header.severityRange ?? null, - compressionRatio: header.nLogs > 0 ? (estimateRawSize(header.nLogs) / totalBytes).toFixed(1) : "0", + compressionRatio: + header.nLogs > 0 ? (estimateRawSize(header.nLogs) / totalBytes).toFixed(1) : "0", }); } } @@ -134,7 +135,7 @@ export function getChunkDetails(store) { return chunks.sort((a, b) => Number(a.timeRange.min - b.timeRange.min)); } -function estimateHeaderSize(header) { +function estimateHeaderSize(_header) { // Rough estimate: JSON-serialized header is ~100-200 bytes return 150; } @@ -148,7 +149,8 @@ export function getServiceBreakdown(store) { for (const streamId of store.streams.ids()) { const resource = store.streams.resourceOf(streamId); - const serviceName = resource.attributes.find((a) => a.key === "service.name")?.value ?? "unknown"; + const serviceName = + resource.attributes.find((a) => a.key === "service.name")?.value ?? "unknown"; const streamChunks = store.streams.chunksOf(streamId); if (!services[serviceName]) { diff --git a/site/logsdb-engine/test/data-gen.test.js b/site/logsdb-engine/test/data-gen.test.js index 7d658521..a75bcb18 100644 --- a/site/logsdb-engine/test/data-gen.test.js +++ b/site/logsdb-engine/test/data-gen.test.js @@ -1,6 +1,6 @@ // @ts-nocheck import { describe, expect, it } from "vitest"; -import { DATASET_PRESETS, generateLogs, generateLogBatches } from "../js/data-gen.js"; +import { DATASET_PRESETS, generateLogBatches, generateLogs } from "../js/data-gen.js"; describe("data-gen", () => { it("produces the expected count of records", () => { @@ -90,11 +90,14 @@ describe("data-gen", () => { it("streaming generator yields complete coverage", () => { const batches = []; - let totalRecords = 0; - for (const { batch, progress } of generateLogBatches({ count: 500, durationMinutes: 5, batchSize: 100 })) { + for (const { batch } of generateLogBatches({ + count: 500, + durationMinutes: 5, + batchSize: 100, + })) { batches.push(batch); - totalRecords += batch.length; } + const totalRecords = batches.reduce((sum, b) => sum + b.length, 0); expect(totalRecords).toBe(500); expect(batches.length).toBe(5); }); diff --git a/site/logsdb-engine/test/query-model.test.js b/site/logsdb-engine/test/query-model.test.js index 6cb3d224..c1b1a483 100644 --- a/site/logsdb-engine/test/query-model.test.js +++ b/site/logsdb-engine/test/query-model.test.js @@ -3,8 +3,8 @@ import { describe, expect, it } from "vitest"; import { generateLogs } from "../js/data-gen.js"; import { buildQuerySpec, - computeSeverityDistribution, computeServiceDistribution, + computeSeverityDistribution, createQueryState, executeQuery, formatBody, From ad2e09894406ee5ae22a380da1da4c20b5fc087c Mon Sep 17 00:00:00 2001 From: strawgate Date: Tue, 28 Apr 2026 01:08:21 -0500 Subject: [PATCH 07/11] chore: retrigger CI From d30b0118cc787ca5b289e86dffbf5d40bb089dc9 Mon Sep 17 00:00:00 2001 From: strawgate Date: Tue, 28 Apr 2026 01:20:09 -0500 Subject: [PATCH 08/11] chore: retrigger CI From f9289705a79b66b8e15c6f4c6cd9435875b65b99 Mon Sep 17 00:00:00 2001 From: strawgate Date: Tue, 28 Apr 2026 01:23:04 -0500 Subject: [PATCH 09/11] fix(o11ylogsdb): fix needle undefined type error in query.ts --- packages/o11ylogsdb/src/query.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/o11ylogsdb/src/query.ts b/packages/o11ylogsdb/src/query.ts index ffced937..ef671677 100644 --- a/packages/o11ylogsdb/src/query.ts +++ b/packages/o11ylogsdb/src/query.ts @@ -163,7 +163,7 @@ export function* queryStream( const bodies = readBodiesOnly(chunk, store.registry, policy); let hasMatch = false; for (let i = 0; i < bodies.length; i++) { - if (typeof bodies[i] === "string" && (bodies[i] as string).includes(needle)) { + if (typeof bodies[i] === "string" && needle !== undefined && (bodies[i] as string).includes(needle)) { hasMatch = true; break; } From 04c5198c30e87df057861ec8a80aa1a991e754bc Mon Sep 17 00:00:00 2001 From: strawgate Date: Tue, 28 Apr 2026 01:25:07 -0500 Subject: [PATCH 10/11] fix(o11ylogsdb): fix biome formatting on query.ts --- packages/o11ylogsdb/src/query.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/packages/o11ylogsdb/src/query.ts b/packages/o11ylogsdb/src/query.ts index ef671677..87c4efab 100644 --- a/packages/o11ylogsdb/src/query.ts +++ b/packages/o11ylogsdb/src/query.ts @@ -163,7 +163,11 @@ export function* queryStream( const bodies = readBodiesOnly(chunk, store.registry, policy); let hasMatch = false; for (let i = 0; i < bodies.length; i++) { - if (typeof bodies[i] === "string" && needle !== undefined && (bodies[i] as string).includes(needle)) { + if ( + typeof bodies[i] === "string" && + needle !== undefined && + (bodies[i] as string).includes(needle) + ) { hasMatch = true; break; } From 80b72513abef303f9f5169cd791418ee49bae09a Mon Sep 17 00:00:00 2001 From: strawgate Date: Tue, 28 Apr 2026 01:33:03 -0500 Subject: [PATCH 11/11] fix(o11ytsdb): fix data loss bug in pointAggregate for decode-backed ranges Use materializeRangeOwned instead of materializeRange to ensure we get owned copies of arrays rather than borrowed references to shared scratch buffers that get invalidated on subsequent decodeView calls. --- packages/o11ytsdb/src/query.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/o11ytsdb/src/query.ts b/packages/o11ytsdb/src/query.ts index 058f0440..5b7d6a0b 100644 --- a/packages/o11ytsdb/src/query.ts +++ b/packages/o11ytsdb/src/query.ts @@ -711,7 +711,7 @@ function pointAggregate(ranges: TimeRange[], fn: AggFn): TimeRange { const counts = new Float64Array(timestamps.length); for (const r of ranges) { - const materialized = materializeRange(r); + const materialized = materializeRangeOwned(r); // Simple: assume aligned timestamps. Real engine would merge-sort. const len = Math.min(materialized.values.length, timestamps.length); for (let i = 0; i < len; i++) {