From 0ea3f2b845365c88e49de1f678e02b64e63fc449 Mon Sep 17 00:00:00 2001 From: abhigyanpatwari Date: Mon, 5 Jan 2026 06:34:54 +0530 Subject: [PATCH 1/6] setting up embeddings pipeline. Facing webgpu directx issue, maybe windows compatibility issue --- .gitignore | 2 + README.md | 153 +++- package-lock.json | 974 ++++++++++++++++++++-- package.json | 1 + src/components/EmbeddingStatus.tsx | 125 +++ src/components/Header.tsx | 6 +- src/core/embeddings/embedder.ts | 224 +++++ src/core/embeddings/embedding-pipeline.ts | 358 ++++++++ src/core/embeddings/index.ts | 11 + src/core/embeddings/text-generator.ts | 235 ++++++ src/core/embeddings/types.ts | 117 +++ src/core/kuzu/csv-generator.ts | 2 + src/core/kuzu/kuzu-adapter.ts | 3 +- src/core/kuzu/schema.ts | 10 + src/hooks/useAppState.tsx | 81 ++ src/workers/ingestion.worker.ts | 125 ++- 16 files changed, 2359 insertions(+), 68 deletions(-) create mode 100644 src/components/EmbeddingStatus.tsx create mode 100644 src/core/embeddings/embedder.ts create mode 100644 src/core/embeddings/embedding-pipeline.ts create mode 100644 src/core/embeddings/index.ts create mode 100644 src/core/embeddings/text-generator.ts create mode 100644 src/core/embeddings/types.ts diff --git a/.gitignore b/.gitignore index 8d10c05a7a..301c1c21c0 100644 --- a/.gitignore +++ b/.gitignore @@ -33,3 +33,5 @@ coverage/ *.local .vercel + + diff --git a/README.md b/README.md index b222753ab0..b48c92b829 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,9 @@ > Privacy-focused, zero-server knowledge graph generator that runs entirely in your browser. -Transform codebases into interactive knowledge graphs using AST parsing, Web Workers, and an embedded KuzuDB WASM database. All processing happens locally - your code never leaves your machine. Next step -> settng up AI Layer : An embedings pipeline using a very small embedings model that can run in browser and a Graph RAG tool using LLMs to generate and execute cyfer queries. Aiming to give rich and complete retrieved context enabling Agent to detect unused code, perform security audits, do a BLAST RADIUS analyses of code changes and for overall codebase understanding and explaination. +Transform codebases into interactive knowledge graphs using AST parsing, Web Workers, and an embedded KuzuDB WASM database. All processing happens locally - your code never leaves your machine. + +**Next up:** Browser-based embeddings + Graph RAG. The cool part? KuzuDB supports native vector indexing, so we can do semantic search AND graph traversal in a single Cypher query. No separate vector DB needed. See [Work in Progress](#-current-work-in-progress) for the full plan. @@ -19,11 +21,116 @@ https://github.com/user-attachments/assets/f375b00a-78cd-4f93-a96c-9ba924455f49 **Actively Building:** - [ ] **Graph RAG Agent** - AI chat with Cypher query generation for intelligent code exploration -- [ ] **Browser Embeddings** - Small embedding model (e.g., gte-small) for semantic node search + LLM-driven RAG +- [ ] **Browser Embeddings** - Small embedding model for semantic node search (see below!) - [ ] **Multi-Worker Pool** - Parallel parsing across multiple Web Workers (currently using single worker) - [ ] **Ollama Support** - Local LLM integration - [ ] **CSV Export** - Export node/relationship tables +### ๐Ÿง  Graph RAG: The Plan + +Here's what we're building for the AI layer. The goal: ask questions in plain English, get answers backed by actual graph traversal + semantic understanding. + +**The Problem:** A regular LLM doesn't know your codebase. It can't tell you what calls `handleAuth` or what breaks if you change `UserService`. You need to give it tools to explore the graph. + +**The Solution:** Combine embeddings (for "find relevant code by meaning") with graph queries (for "trace connections"). + +```mermaid +flowchart TD + Q[Your Question] --> EMB[Embed with transformers.js] + EMB --> VS[Vector Search in KuzuDB] + VS --> ENTRY[Entry Point Nodes] + ENTRY --> EXPAND[Graph Traversal via Cypher] + EXPAND --> CTX[Rich Context] + CTX --> LLM[LLM Generates Answer] +``` + +**Embedding Model:** We're going with `snowflake-arctic-embed-xs` - a tiny 22M parameter model that runs entirely in the browser via [transformers.js](https://huggingface.co/docs/transformers.js). It outputs 384-dimensional vectors and scores 50.15 on MTEB (comparable to models 5x its size). The model downloads once (~90MB), gets cached, and runs locally forever. Privacy intact. โœ… + +**The Pipeline:** + +```mermaid +flowchart LR + subgraph Main["Main Pipeline (Blocking)"] + P1[Extract] --> P2[Structure] --> P3[Parse] --> P4[Imports] --> P5[Calls] + end + + P5 --> READY[Graph Ready!
User can explore] + READY --> BG + + subgraph BG["Background (Non-blocking)"] + E1[Load Model] --> E2[Embed Nodes] --> E3[Create Vector Index] + end + + E3 --> AI[AI Search Ready!] +``` + +The idea: you can start exploring the graph immediately after Phase 5. Meanwhile, embeddings are generated in the background. Once done, semantic search unlocks. + +### ๐Ÿ’ก A Fun Discovery: Unified Vector + Graph = Superpowers + +While designing this, I stumbled onto something cool. Most Graph RAG systems use **separate databases** - a vector DB (Pinecone, Qdrant) for semantic search and a graph DB (Neo4j) for traversal. This means the LLM has to: + +1. Call vector search โ†’ get IDs +2. Take those IDs โ†’ call graph DB +3. Coordinate between two systems + +But KuzuDB WASM supports **native vector indexing** (HNSW). Which means we can do vector search AND graph traversal **in a single Cypher query**: + +```cypher +-- Find code similar to "authentication" AND trace what calls it +-- ALL IN ONE QUERY! ๐Ÿคฏ +CALL QUERY_VECTOR_INDEX('CodeNode', 'embedding_idx', $queryVector, 10) +WITH node AS match, distance +WHERE distance < 0.4 +MATCH (caller:CodeNode)-[r:CodeRelation {type: 'CALLS'}]->(match) +RETURN match.name AS found, + caller.name AS called_by, + distance AS relevance +ORDER BY distance +``` + +This is kind of a big deal. Here's why: + +**Traditional approach (2 queries, 2 systems):** +``` +semantic_search("auth") โ†’ ["id1", "id2", "id3"] + โ†“ +graph_query("MATCH ... WHERE id IN [...]") โ†’ results +``` + +**Unified KuzuDB approach (1 query, 1 system):** +``` +cypher("CALL QUERY_VECTOR_INDEX(...) WITH node MATCH (node)-[...]->() ...") โ†’ results +``` + +And because `distance` comes back with every result, we get **built-in reranking for free**: + +```cypher +-- The LLM can dynamically control relevance thresholds! +CALL QUERY_VECTOR_INDEX('CodeNode', 'idx', $vec, 20) +WITH node, distance, + CASE + WHEN distance < 0.15 THEN 'exact_match' + WHEN distance < 0.30 THEN 'highly_relevant' + ELSE 'related' + END AS tier +WHERE distance < 0.5 +MATCH (node)-[*1..2]-(context) +RETURN node.name, tier, collect(context.name) AS related +ORDER BY distance +``` + +**What this enables:** +- ๐ŸŽฏ **Single query execution** - No round trips between systems +- ๐Ÿ“Š **Hierarchical relevance** - LLM sees exact matches vs related vs weak +- ๐ŸŒณ **Weighted expansion** - Traverse further from better matches +- โšก **Dynamic thresholds** - LLM adjusts `WHERE distance < X` per question type +- ๐Ÿ”„ **No reranker needed** - Distance IS the relevance score + +Basically, the LLM gets to write one smart query that does semantic search, filters by relevance, expands via graph relationships, and returns ranked results. No separate reranker model, no vector DB API calls, no coordination logic. Just Cypher. + +Still wrapping my head around all the query patterns this unlocks, but I'm pretty excited about it. + --- ## โšก What's New in V2 @@ -363,6 +470,7 @@ RETURN f.name - โœ… Polymorphic schema (single node/edge tables) - โœ… CSV generation and bulk loading - โœ… Cypher query execution +- ๐Ÿšง Vector embeddings + HNSW index (WIP) - ๐Ÿšง Graph RAG agent (WIP) --- @@ -372,9 +480,10 @@ RETURN f.name - **Frontend**: React 18 + TypeScript + Vite + Tailwind CSS v4 - **Visualization**: Sigma.js + Graphology + ForceAtlas2 (WebGL) - **Parsing**: Tree-sitter WASM (TypeScript, JavaScript, Python) -- **Database**: KuzuDB WASM (in-browser graph database) +- **Database**: KuzuDB WASM (in-browser graph database + vector index) - **Concurrency**: Web Worker + Comlink - **Caching**: lru-cache with WASM memory management +- **AI (WIP)**: transformers.js for browser embeddings, LangChain for agent orchestration --- @@ -428,27 +537,41 @@ Open http://localhost:5173 ### Graph RAG Agent (WIP) -The idea: ask questions in plain English, get answers backed by graph queries. +The idea: ask questions in plain English, get answers backed by graph queries + semantic understanding. ```mermaid -flowchart LR +flowchart TD USER[Your Question] --> LLM[LLM] - LLM --> TOOLS[Pick a Tool] - TOOLS --> CYPHER[Run Cypher] - TOOLS --> SEARCH[Semantic Search] - CYPHER --> CONTEXT[Gather Context] - SEARCH --> CONTEXT - CONTEXT --> LLM + LLM --> |Generates| CYPHER[Unified Cypher Query] + + subgraph KUZU[KuzuDB WASM] + CYPHER --> VEC[Vector Search] + VEC --> GRAPH[Graph Traversal] + GRAPH --> RANK[Ranked Results] + end + + RANK --> CTX[Rich Context + Code Snippets] + CTX --> LLM LLM --> ANSWER[Your Answer] ``` **Example interactions:** -- "What functions call `handleAuth`?" โ†’ Generates Cypher, returns list -- "Show me the blast radius if I change `UserService`" โ†’ Traverses dependencies -- "Find all files that import from `utils/`" โ†’ Pattern matching query +- "What functions call `handleAuth`?" โ†’ Vector search finds `handleAuth`, Cypher traces callers +- "Show me the blast radius if I change `UserService`" โ†’ Finds service, traverses 3 hops of dependencies +- "How does authentication work in this codebase?" โ†’ Semantic search for auth-related code, returns connected components + +**Why dynamic Cypher generation?** Originally we planned to use pre-built query templates (because LLMs can be... creative with syntax). But with the unified vector + graph approach, the LLM just needs to learn one pattern: + +```cypher +CALL QUERY_VECTOR_INDEX(...) WITH node, distance +WHERE distance < [threshold] +MATCH (node)-[relationship pattern]->(connected) +RETURN [what you need] +ORDER BY distance +``` -**Why pre-built query templates?** LLMs are... creative with Cypher syntax. Instead of letting the LLM generate queries from scratch (and fail half the time), we're building a library of reliable query templates that the LLM can choose from and fill in. +Give the LLM the schema, a few examples, and let it compose queries. The schema is simple enough that modern LLMs (GPT-4, Claude) handle it well. And if a query fails? The error message is usually clear enough for the LLM to self-correct. --- diff --git a/package-lock.json b/package-lock.json index 84f04a83ae..5cdccdc15f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8,6 +8,7 @@ "name": "gitnexus", "version": "0.0.0", "dependencies": { + "@huggingface/transformers": "^3.0.0", "@isomorphic-git/lightning-fs": "^4.6.2", "@sigma/edge-curve": "^3.1.0", "@tailwindcss/vite": "^4.1.18", @@ -428,6 +429,16 @@ "node": ">=16" } }, + "node_modules/@emnapi/runtime": { + "version": "1.8.1", + "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.8.1.tgz", + "integrity": "sha512-mehfKSMWjjNol8659Z8KxEMrdSJDDot5SXMq00dM8BN4o+CLNXQ0xH2V7EchNHV4RmbZLmmPdEaXZc5H2FXmDg==", + "license": "MIT", + "optional": true, + "dependencies": { + "tslib": "^2.4.0" + } + }, "node_modules/@esbuild/aix-ppc64": { "version": "0.21.5", "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.21.5.tgz", @@ -723,87 +734,573 @@ "cpu": [ "x64" ], - "license": "MIT", + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/sunos-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.21.5.tgz", + "integrity": "sha512-6+gjmFpfy0BHU5Tpptkuh8+uw3mnrvgs+dSPQXQOv3ekbordwnzTVEb4qnIvQcYXq6gzkyTnoZ9dZG+D4garKg==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "sunos" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-arm64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.21.5.tgz", + "integrity": "sha512-Z0gOTd75VvXqyq7nsl93zwahcTROgqvuAcYDUr+vOv8uHhNSKROyU961kgtCD1e95IqPKSQKH7tBTslnS3tA8A==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-ia32": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.21.5.tgz", + "integrity": "sha512-SWXFF1CL2RVNMaVs+BBClwtfZSvDgtL//G/smwAc5oVK/UPu2Gu9tIaRgFmYFFKrmg3SyAjSrElf0TiJ1v8fYA==", + "cpu": [ + "ia32" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.21.5.tgz", + "integrity": "sha512-tQd/1efJuzPC6rCFwEvLtci/xNFcTZknmXs98FYDfGE4wP9ClFV98nyKrzJKVPMhdDnjzLhdUyMX4PsQAPjwIw==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@fastify/busboy": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/@fastify/busboy/-/busboy-2.1.1.tgz", + "integrity": "sha512-vBZP4NlzfOlerQTnba4aqZoMhE/a9HY7HRqoOPaETQcSQuWEIyZMHGfVu6w9wGtGK5fED5qRs2DteVCjOH60sA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=14" + } + }, + "node_modules/@huggingface/jinja": { + "version": "0.5.3", + "resolved": "https://registry.npmjs.org/@huggingface/jinja/-/jinja-0.5.3.tgz", + "integrity": "sha512-asqfZ4GQS0hD876Uw4qiUb7Tr/V5Q+JZuo2L+BtdrD4U40QU58nIRq3ZSgAzJgT874VLjhGVacaYfrdpXtEvtA==", + "license": "MIT", + "engines": { + "node": ">=18" + } + }, + "node_modules/@huggingface/transformers": { + "version": "3.8.1", + "resolved": "https://registry.npmjs.org/@huggingface/transformers/-/transformers-3.8.1.tgz", + "integrity": "sha512-tsTk4zVjImqdqjS8/AOZg2yNLd1z9S5v+7oUPpXaasDRwEDhB+xnglK1k5cad26lL5/ZIaeREgWWy0bs9y9pPA==", + "license": "Apache-2.0", + "dependencies": { + "@huggingface/jinja": "^0.5.3", + "onnxruntime-node": "1.21.0", + "onnxruntime-web": "1.22.0-dev.20250409-89f8206ba4", + "sharp": "^0.34.1" + } + }, + "node_modules/@img/colour": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/@img/colour/-/colour-1.0.0.tgz", + "integrity": "sha512-A5P/LfWGFSl6nsckYtjw9da+19jB8hkJ6ACTGcDfEJ0aE+l2n2El7dsVM7UVHZQ9s2lmYMWlrS21YLy2IR1LUw==", + "license": "MIT", + "engines": { + "node": ">=18" + } + }, + "node_modules/@img/sharp-darwin-arm64": { + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-darwin-arm64/-/sharp-darwin-arm64-0.34.5.tgz", + "integrity": "sha512-imtQ3WMJXbMY4fxb/Ndp6HBTNVtWCUI0WdobyheGf5+ad6xX8VIDO8u2xE4qc/fr08CKG/7dDseFtn6M6g/r3w==", + "cpu": [ + "arm64" + ], + "license": "Apache-2.0", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" + }, + "optionalDependencies": { + "@img/sharp-libvips-darwin-arm64": "1.2.4" + } + }, + "node_modules/@img/sharp-darwin-x64": { + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-darwin-x64/-/sharp-darwin-x64-0.34.5.tgz", + "integrity": "sha512-YNEFAF/4KQ/PeW0N+r+aVVsoIY0/qxxikF2SWdp+NRkmMB7y9LBZAVqQ4yhGCm/H3H270OSykqmQMKLBhBJDEw==", + "cpu": [ + "x64" + ], + "license": "Apache-2.0", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" + }, + "optionalDependencies": { + "@img/sharp-libvips-darwin-x64": "1.2.4" + } + }, + "node_modules/@img/sharp-libvips-darwin-arm64": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-arm64/-/sharp-libvips-darwin-arm64-1.2.4.tgz", + "integrity": "sha512-zqjjo7RatFfFoP0MkQ51jfuFZBnVE2pRiaydKJ1G/rHZvnsrHAOcQALIi9sA5co5xenQdTugCvtb1cuf78Vf4g==", + "cpu": [ + "arm64" + ], + "license": "LGPL-3.0-or-later", + "optional": true, + "os": [ + "darwin" + ], + "funding": { + "url": "https://opencollective.com/libvips" + } + }, + "node_modules/@img/sharp-libvips-darwin-x64": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-x64/-/sharp-libvips-darwin-x64-1.2.4.tgz", + "integrity": "sha512-1IOd5xfVhlGwX+zXv2N93k0yMONvUlANylbJw1eTah8K/Jtpi15KC+WSiaX/nBmbm2HxRM1gZ0nSdjSsrZbGKg==", + "cpu": [ + "x64" + ], + "license": "LGPL-3.0-or-later", + "optional": true, + "os": [ + "darwin" + ], + "funding": { + "url": "https://opencollective.com/libvips" + } + }, + "node_modules/@img/sharp-libvips-linux-arm": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm/-/sharp-libvips-linux-arm-1.2.4.tgz", + "integrity": "sha512-bFI7xcKFELdiNCVov8e44Ia4u2byA+l3XtsAj+Q8tfCwO6BQ8iDojYdvoPMqsKDkuoOo+X6HZA0s0q11ANMQ8A==", + "cpu": [ + "arm" + ], + "license": "LGPL-3.0-or-later", + "optional": true, + "os": [ + "linux" + ], + "funding": { + "url": "https://opencollective.com/libvips" + } + }, + "node_modules/@img/sharp-libvips-linux-arm64": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm64/-/sharp-libvips-linux-arm64-1.2.4.tgz", + "integrity": "sha512-excjX8DfsIcJ10x1Kzr4RcWe1edC9PquDRRPx3YVCvQv+U5p7Yin2s32ftzikXojb1PIFc/9Mt28/y+iRklkrw==", + "cpu": [ + "arm64" + ], + "license": "LGPL-3.0-or-later", + "optional": true, + "os": [ + "linux" + ], + "funding": { + "url": "https://opencollective.com/libvips" + } + }, + "node_modules/@img/sharp-libvips-linux-ppc64": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-ppc64/-/sharp-libvips-linux-ppc64-1.2.4.tgz", + "integrity": "sha512-FMuvGijLDYG6lW+b/UvyilUWu5Ayu+3r2d1S8notiGCIyYU/76eig1UfMmkZ7vwgOrzKzlQbFSuQfgm7GYUPpA==", + "cpu": [ + "ppc64" + ], + "license": "LGPL-3.0-or-later", + "optional": true, + "os": [ + "linux" + ], + "funding": { + "url": "https://opencollective.com/libvips" + } + }, + "node_modules/@img/sharp-libvips-linux-riscv64": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-riscv64/-/sharp-libvips-linux-riscv64-1.2.4.tgz", + "integrity": "sha512-oVDbcR4zUC0ce82teubSm+x6ETixtKZBh/qbREIOcI3cULzDyb18Sr/Wcyx7NRQeQzOiHTNbZFF1UwPS2scyGA==", + "cpu": [ + "riscv64" + ], + "license": "LGPL-3.0-or-later", + "optional": true, + "os": [ + "linux" + ], + "funding": { + "url": "https://opencollective.com/libvips" + } + }, + "node_modules/@img/sharp-libvips-linux-s390x": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-s390x/-/sharp-libvips-linux-s390x-1.2.4.tgz", + "integrity": "sha512-qmp9VrzgPgMoGZyPvrQHqk02uyjA0/QrTO26Tqk6l4ZV0MPWIW6LTkqOIov+J1yEu7MbFQaDpwdwJKhbJvuRxQ==", + "cpu": [ + "s390x" + ], + "license": "LGPL-3.0-or-later", + "optional": true, + "os": [ + "linux" + ], + "funding": { + "url": "https://opencollective.com/libvips" + } + }, + "node_modules/@img/sharp-libvips-linux-x64": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-x64/-/sharp-libvips-linux-x64-1.2.4.tgz", + "integrity": "sha512-tJxiiLsmHc9Ax1bz3oaOYBURTXGIRDODBqhveVHonrHJ9/+k89qbLl0bcJns+e4t4rvaNBxaEZsFtSfAdquPrw==", + "cpu": [ + "x64" + ], + "license": "LGPL-3.0-or-later", + "optional": true, + "os": [ + "linux" + ], + "funding": { + "url": "https://opencollective.com/libvips" + } + }, + "node_modules/@img/sharp-libvips-linuxmusl-arm64": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-arm64/-/sharp-libvips-linuxmusl-arm64-1.2.4.tgz", + "integrity": "sha512-FVQHuwx1IIuNow9QAbYUzJ+En8KcVm9Lk5+uGUQJHaZmMECZmOlix9HnH7n1TRkXMS0pGxIJokIVB9SuqZGGXw==", + "cpu": [ + "arm64" + ], + "license": "LGPL-3.0-or-later", + "optional": true, + "os": [ + "linux" + ], + "funding": { + "url": "https://opencollective.com/libvips" + } + }, + "node_modules/@img/sharp-libvips-linuxmusl-x64": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-x64/-/sharp-libvips-linuxmusl-x64-1.2.4.tgz", + "integrity": "sha512-+LpyBk7L44ZIXwz/VYfglaX/okxezESc6UxDSoyo2Ks6Jxc4Y7sGjpgU9s4PMgqgjj1gZCylTieNamqA1MF7Dg==", + "cpu": [ + "x64" + ], + "license": "LGPL-3.0-or-later", + "optional": true, + "os": [ + "linux" + ], + "funding": { + "url": "https://opencollective.com/libvips" + } + }, + "node_modules/@img/sharp-linux-arm": { + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm/-/sharp-linux-arm-0.34.5.tgz", + "integrity": "sha512-9dLqsvwtg1uuXBGZKsxem9595+ujv0sJ6Vi8wcTANSFpwV/GONat5eCkzQo/1O6zRIkh0m/8+5BjrRr7jDUSZw==", + "cpu": [ + "arm" + ], + "license": "Apache-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" + }, + "optionalDependencies": { + "@img/sharp-libvips-linux-arm": "1.2.4" + } + }, + "node_modules/@img/sharp-linux-arm64": { + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm64/-/sharp-linux-arm64-0.34.5.tgz", + "integrity": "sha512-bKQzaJRY/bkPOXyKx5EVup7qkaojECG6NLYswgktOZjaXecSAeCWiZwwiFf3/Y+O1HrauiE3FVsGxFg8c24rZg==", + "cpu": [ + "arm64" + ], + "license": "Apache-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" + }, + "optionalDependencies": { + "@img/sharp-libvips-linux-arm64": "1.2.4" + } + }, + "node_modules/@img/sharp-linux-ppc64": { + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-linux-ppc64/-/sharp-linux-ppc64-0.34.5.tgz", + "integrity": "sha512-7zznwNaqW6YtsfrGGDA6BRkISKAAE1Jo0QdpNYXNMHu2+0dTrPflTLNkpc8l7MUP5M16ZJcUvysVWWrMefZquA==", + "cpu": [ + "ppc64" + ], + "license": "Apache-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" + }, + "optionalDependencies": { + "@img/sharp-libvips-linux-ppc64": "1.2.4" + } + }, + "node_modules/@img/sharp-linux-riscv64": { + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-linux-riscv64/-/sharp-linux-riscv64-0.34.5.tgz", + "integrity": "sha512-51gJuLPTKa7piYPaVs8GmByo7/U7/7TZOq+cnXJIHZKavIRHAP77e3N2HEl3dgiqdD/w0yUfiJnII77PuDDFdw==", + "cpu": [ + "riscv64" + ], + "license": "Apache-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" + }, + "optionalDependencies": { + "@img/sharp-libvips-linux-riscv64": "1.2.4" + } + }, + "node_modules/@img/sharp-linux-s390x": { + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-linux-s390x/-/sharp-linux-s390x-0.34.5.tgz", + "integrity": "sha512-nQtCk0PdKfho3eC5MrbQoigJ2gd1CgddUMkabUj+rBevs8tZ2cULOx46E7oyX+04WGfABgIwmMC0VqieTiR4jg==", + "cpu": [ + "s390x" + ], + "license": "Apache-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" + }, + "optionalDependencies": { + "@img/sharp-libvips-linux-s390x": "1.2.4" + } + }, + "node_modules/@img/sharp-linux-x64": { + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-linux-x64/-/sharp-linux-x64-0.34.5.tgz", + "integrity": "sha512-MEzd8HPKxVxVenwAa+JRPwEC7QFjoPWuS5NZnBt6B3pu7EG2Ge0id1oLHZpPJdn3OQK+BQDiw9zStiHBTJQQQQ==", + "cpu": [ + "x64" + ], + "license": "Apache-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" + }, + "optionalDependencies": { + "@img/sharp-libvips-linux-x64": "1.2.4" + } + }, + "node_modules/@img/sharp-linuxmusl-arm64": { + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-arm64/-/sharp-linuxmusl-arm64-0.34.5.tgz", + "integrity": "sha512-fprJR6GtRsMt6Kyfq44IsChVZeGN97gTD331weR1ex1c1rypDEABN6Tm2xa1wE6lYb5DdEnk03NZPqA7Id21yg==", + "cpu": [ + "arm64" + ], + "license": "Apache-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" + }, + "optionalDependencies": { + "@img/sharp-libvips-linuxmusl-arm64": "1.2.4" + } + }, + "node_modules/@img/sharp-linuxmusl-x64": { + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-x64/-/sharp-linuxmusl-x64-0.34.5.tgz", + "integrity": "sha512-Jg8wNT1MUzIvhBFxViqrEhWDGzqymo3sV7z7ZsaWbZNDLXRJZoRGrjulp60YYtV4wfY8VIKcWidjojlLcWrd8Q==", + "cpu": [ + "x64" + ], + "license": "Apache-2.0", "optional": true, "os": [ - "openbsd" + "linux" ], "engines": { - "node": ">=12" + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" + }, + "optionalDependencies": { + "@img/sharp-libvips-linuxmusl-x64": "1.2.4" } }, - "node_modules/@esbuild/sunos-x64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.21.5.tgz", - "integrity": "sha512-6+gjmFpfy0BHU5Tpptkuh8+uw3mnrvgs+dSPQXQOv3ekbordwnzTVEb4qnIvQcYXq6gzkyTnoZ9dZG+D4garKg==", + "node_modules/@img/sharp-wasm32": { + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-wasm32/-/sharp-wasm32-0.34.5.tgz", + "integrity": "sha512-OdWTEiVkY2PHwqkbBI8frFxQQFekHaSSkUIJkwzclWZe64O1X4UlUjqqqLaPbUpMOQk6FBu/HtlGXNblIs0huw==", "cpu": [ - "x64" + "wasm32" ], - "license": "MIT", + "license": "Apache-2.0 AND LGPL-3.0-or-later AND MIT", "optional": true, - "os": [ - "sunos" - ], + "dependencies": { + "@emnapi/runtime": "^1.7.0" + }, "engines": { - "node": ">=12" + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" } }, - "node_modules/@esbuild/win32-arm64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.21.5.tgz", - "integrity": "sha512-Z0gOTd75VvXqyq7nsl93zwahcTROgqvuAcYDUr+vOv8uHhNSKROyU961kgtCD1e95IqPKSQKH7tBTslnS3tA8A==", + "node_modules/@img/sharp-win32-arm64": { + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-win32-arm64/-/sharp-win32-arm64-0.34.5.tgz", + "integrity": "sha512-WQ3AgWCWYSb2yt+IG8mnC6Jdk9Whs7O0gxphblsLvdhSpSTtmu69ZG1Gkb6NuvxsNACwiPV6cNSZNzt0KPsw7g==", "cpu": [ "arm64" ], - "license": "MIT", + "license": "Apache-2.0 AND LGPL-3.0-or-later", "optional": true, "os": [ "win32" ], "engines": { - "node": ">=12" + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" } }, - "node_modules/@esbuild/win32-ia32": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.21.5.tgz", - "integrity": "sha512-SWXFF1CL2RVNMaVs+BBClwtfZSvDgtL//G/smwAc5oVK/UPu2Gu9tIaRgFmYFFKrmg3SyAjSrElf0TiJ1v8fYA==", + "node_modules/@img/sharp-win32-ia32": { + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-win32-ia32/-/sharp-win32-ia32-0.34.5.tgz", + "integrity": "sha512-FV9m/7NmeCmSHDD5j4+4pNI8Cp3aW+JvLoXcTUo0IqyjSfAZJ8dIUmijx1qaJsIiU+Hosw6xM5KijAWRJCSgNg==", "cpu": [ "ia32" ], - "license": "MIT", + "license": "Apache-2.0 AND LGPL-3.0-or-later", "optional": true, "os": [ "win32" ], "engines": { - "node": ">=12" + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" } }, - "node_modules/@esbuild/win32-x64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.21.5.tgz", - "integrity": "sha512-tQd/1efJuzPC6rCFwEvLtci/xNFcTZknmXs98FYDfGE4wP9ClFV98nyKrzJKVPMhdDnjzLhdUyMX4PsQAPjwIw==", + "node_modules/@img/sharp-win32-x64": { + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-win32-x64/-/sharp-win32-x64-0.34.5.tgz", + "integrity": "sha512-+29YMsqY2/9eFEiW93eqWnuLcWcufowXewwSNIT6UwZdUUCrM3oFjMWH/Z6/TMmb4hlFenmfAVbpWeup2jryCw==", "cpu": [ "x64" ], - "license": "MIT", + "license": "Apache-2.0 AND LGPL-3.0-or-later", "optional": true, "os": [ "win32" ], "engines": { - "node": ">=12" - } - }, - "node_modules/@fastify/busboy": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/@fastify/busboy/-/busboy-2.1.1.tgz", - "integrity": "sha512-vBZP4NlzfOlerQTnba4aqZoMhE/a9HY7HRqoOPaETQcSQuWEIyZMHGfVu6w9wGtGK5fED5qRs2DteVCjOH60sA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=14" + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" } }, "node_modules/@isaacs/balanced-match": { @@ -833,7 +1330,6 @@ "version": "4.0.1", "resolved": "https://registry.npmjs.org/@isaacs/fs-minipass/-/fs-minipass-4.0.1.tgz", "integrity": "sha512-wgm9Ehl2jpeqP3zw/7mo3kRHFp5MEDhqAdwy1fTGkHAwnkGOVsgpvQhL8B5n1qlb01jV3n/bI0ZfZp5lWA1k4w==", - "dev": true, "license": "ISC", "dependencies": { "minipass": "^7.0.4" @@ -981,6 +1477,70 @@ "node": ">= 8" } }, + "node_modules/@protobufjs/aspromise": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz", + "integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/base64": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz", + "integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/codegen": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.4.tgz", + "integrity": "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/eventemitter": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz", + "integrity": "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/fetch": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz", + "integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==", + "license": "BSD-3-Clause", + "dependencies": { + "@protobufjs/aspromise": "^1.1.1", + "@protobufjs/inquire": "^1.1.0" + } + }, + "node_modules/@protobufjs/float": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz", + "integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/inquire": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.0.tgz", + "integrity": "sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/path": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz", + "integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/pool": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz", + "integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/utf8": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz", + "integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==", + "license": "BSD-3-Clause" + }, "node_modules/@rolldown/pluginutils": { "version": "1.0.0-beta.47", "resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-beta.47.tgz", @@ -1991,7 +2551,6 @@ "version": "24.10.1", "resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.1.tgz", "integrity": "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ==", - "devOptional": true, "license": "MIT", "dependencies": { "undici-types": "~7.16.0" @@ -2458,6 +3017,13 @@ "file-uri-to-path": "1.0.0" } }, + "node_modules/boolean": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/boolean/-/boolean-3.2.0.tgz", + "integrity": "sha512-d0II/GO9uf9lfUHH2BQsjxzRJZBdsjgsBiW4BvhWk/3qoKwQFjIDVN19PfX8F2D/r9PCMTtLWjYVCFrpeYUzsw==", + "deprecated": "Package no longer supported. Contact Support at https://www.npmjs.com/support for more info.", + "license": "MIT" + }, "node_modules/brace-expansion": { "version": "1.1.12", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz", @@ -2696,7 +3262,6 @@ "version": "3.0.0", "resolved": "https://registry.npmjs.org/chownr/-/chownr-3.0.0.tgz", "integrity": "sha512-+IxzY9BZOQd/XuYPRmrvEVjF/nqj5kgT4kEq7VofrDoM1MxoRjEWkrCC3EtLi59TVawxTAn+orJwFQcrqEN1+g==", - "dev": true, "license": "BlueOak-1.0.0", "engines": { "node": ">=18" @@ -3287,6 +3852,23 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/define-properties": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/define-properties/-/define-properties-1.2.1.tgz", + "integrity": "sha512-8QmQKqEASLd5nx0U1B1okLElbUuuttJ/AnYmRXbbbGDWh6uS208EjD4Xqq/I9wK7u0v6O08XhTWnt5XtEbR6Dg==", + "license": "MIT", + "dependencies": { + "define-data-property": "^1.0.1", + "has-property-descriptors": "^1.0.0", + "object-keys": "^1.1.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, "node_modules/delaunator": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/delaunator/-/delaunator-5.0.1.tgz", @@ -3323,6 +3905,12 @@ "node": ">=8" } }, + "node_modules/detect-node": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/detect-node/-/detect-node-2.1.0.tgz", + "integrity": "sha512-T0NIuQpnTvFDATNuHN5roPwSBG83rFsuO+MXXH9/3N1eFbn4wcPjttvjMLEPWJ0RGUYgQE7cGgS3tNxbqCGM7g==", + "license": "MIT" + }, "node_modules/devlop": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/devlop/-/devlop-1.1.0.tgz", @@ -3479,6 +4067,12 @@ "node": ">= 0.4" } }, + "node_modules/es6-error": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/es6-error/-/es6-error-4.1.1.tgz", + "integrity": "sha512-Um/+FxMr9CISWh0bi5Zv0iOD+4cFh5qLeks1qhAopKVAJw3drgKbKySikp7wGhDL0HPeaja0P5ULZrxLkniUVg==", + "license": "MIT" + }, "node_modules/esbuild": { "version": "0.21.5", "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.21.5.tgz", @@ -3867,6 +4461,18 @@ "node": ">=6" } }, + "node_modules/escape-string-regexp": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz", + "integrity": "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==", + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/esm": { "version": "3.2.25", "resolved": "https://registry.npmjs.org/esm/-/esm-3.2.25.tgz", @@ -4000,6 +4606,12 @@ "node": ">=8" } }, + "node_modules/flatbuffers": { + "version": "25.9.23", + "resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-25.9.23.tgz", + "integrity": "sha512-MI1qs7Lo4Syw0EOzUl0xjs2lsoeqFku44KpngfIduHBYvzm8h2+7K8YMQh1JtVVVrUvhLpNwqVi4DERegUJhPQ==", + "license": "Apache-2.0" + }, "node_modules/follow-redirects": { "version": "1.15.11", "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.11.tgz", @@ -4160,6 +4772,51 @@ "node": ">= 6" } }, + "node_modules/global-agent": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/global-agent/-/global-agent-3.0.0.tgz", + "integrity": "sha512-PT6XReJ+D07JvGoxQMkT6qji/jVNfX/h364XHZOWeRzy64sSFr+xJ5OX7LI3b4MPQzdL4H8Y8M0xzPpsVMwA8Q==", + "license": "BSD-3-Clause", + "dependencies": { + "boolean": "^3.0.1", + "es6-error": "^4.1.1", + "matcher": "^3.0.0", + "roarr": "^2.15.3", + "semver": "^7.3.2", + "serialize-error": "^7.0.1" + }, + "engines": { + "node": ">=10.0" + } + }, + "node_modules/global-agent/node_modules/semver": { + "version": "7.7.3", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.3.tgz", + "integrity": "sha512-SdsKMrI9TdgjdweUSR9MweHA4EJ8YxHn8DFaDisvhVlUOe4BF1tLD7GAj0lIqWVl+dPb/rExr0Btby5loQm20Q==", + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/globalthis": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/globalthis/-/globalthis-1.0.4.tgz", + "integrity": "sha512-DpLKbNU4WylpxJykQujfCcwYWiV/Jhm50Goo0wrVILAv5jOr9d+H+UR3PhSCD2rCCEIg0uc+G+muBTwD54JhDQ==", + "license": "MIT", + "dependencies": { + "define-properties": "^1.2.1", + "gopd": "^1.0.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, "node_modules/gopd": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", @@ -4242,6 +4899,12 @@ "graphology-types": ">=0.23.0" } }, + "node_modules/guid-typescript": { + "version": "1.0.9", + "resolved": "https://registry.npmjs.org/guid-typescript/-/guid-typescript-1.0.9.tgz", + "integrity": "sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ==", + "license": "ISC" + }, "node_modules/has-property-descriptors": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.2.tgz", @@ -4742,6 +5405,12 @@ "dev": true, "license": "MIT" }, + "node_modules/json-stringify-safe": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz", + "integrity": "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA==", + "license": "ISC" + }, "node_modules/json5": { "version": "2.2.3", "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", @@ -5062,6 +5731,12 @@ "url": "https://opencollective.com/parcel" } }, + "node_modules/long": { + "version": "5.3.2", + "resolved": "https://registry.npmjs.org/long/-/long-5.3.2.tgz", + "integrity": "sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==", + "license": "Apache-2.0" + }, "node_modules/longest-streak": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/longest-streak/-/longest-streak-3.1.0.tgz", @@ -5132,6 +5807,18 @@ "dev": true, "license": "ISC" }, + "node_modules/matcher": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/matcher/-/matcher-3.0.0.tgz", + "integrity": "sha512-OkeDaAZ/bQCxeFAozM55PKcKU0yJMPGifLwV4Qgjitu+5MoAfSQN4lsLJeXZ1b8w0x+/Emda6MZgXS1jvsapng==", + "license": "MIT", + "dependencies": { + "escape-string-regexp": "^4.0.0" + }, + "engines": { + "node": ">=10" + } + }, "node_modules/math-intrinsics": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", @@ -5831,7 +6518,6 @@ "version": "7.1.2", "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz", "integrity": "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==", - "dev": true, "license": "ISC", "engines": { "node": ">=16 || 14 >=14.17" @@ -5841,7 +6527,6 @@ "version": "3.1.0", "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.1.0.tgz", "integrity": "sha512-KZxYo1BUkWD2TVFLr0MQoM8vUUigWD3LlD83a/75BqC+4qE0Hb1Vo5v1FgcfaNXvfXzr+5EhQ6ing/CaBijTlw==", - "dev": true, "license": "MIT", "dependencies": { "minipass": "^7.1.2" @@ -5963,6 +6648,15 @@ "node": ">=0.10.0" } }, + "node_modules/object-keys": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/object-keys/-/object-keys-1.1.1.tgz", + "integrity": "sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, "node_modules/observable-fns": { "version": "0.6.1", "resolved": "https://registry.npmjs.org/observable-fns/-/observable-fns-0.6.1.tgz", @@ -5978,6 +6672,49 @@ "wrappy": "1" } }, + "node_modules/onnxruntime-common": { + "version": "1.21.0", + "resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.21.0.tgz", + "integrity": "sha512-Q632iLLrtCAVOTO65dh2+mNbQir/QNTVBG3h/QdZBpns7mZ0RYbLRBgGABPbpU9351AgYy7SJf1WaeVwMrBFPQ==", + "license": "MIT" + }, + "node_modules/onnxruntime-node": { + "version": "1.21.0", + "resolved": "https://registry.npmjs.org/onnxruntime-node/-/onnxruntime-node-1.21.0.tgz", + "integrity": "sha512-NeaCX6WW2L8cRCSqy3bInlo5ojjQqu2fD3D+9W5qb5irwxhEyWKXeH2vZ8W9r6VxaMPUan+4/7NDwZMtouZxEw==", + "hasInstallScript": true, + "license": "MIT", + "os": [ + "win32", + "darwin", + "linux" + ], + "dependencies": { + "global-agent": "^3.0.0", + "onnxruntime-common": "1.21.0", + "tar": "^7.0.1" + } + }, + "node_modules/onnxruntime-web": { + "version": "1.22.0-dev.20250409-89f8206ba4", + "resolved": "https://registry.npmjs.org/onnxruntime-web/-/onnxruntime-web-1.22.0-dev.20250409-89f8206ba4.tgz", + "integrity": "sha512-0uS76OPgH0hWCPrFKlL8kYVV7ckM7t/36HfbgoFw6Nd0CZVVbQC4PkrR8mBX8LtNUFZO25IQBqV2Hx2ho3FlbQ==", + "license": "MIT", + "dependencies": { + "flatbuffers": "^25.1.24", + "guid-typescript": "^1.0.9", + "long": "^5.2.3", + "onnxruntime-common": "1.22.0-dev.20250409-89f8206ba4", + "platform": "^1.3.6", + "protobufjs": "^7.2.4" + } + }, + "node_modules/onnxruntime-web/node_modules/onnxruntime-common": { + "version": "1.22.0-dev.20250409-89f8206ba4", + "resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.22.0-dev.20250409-89f8206ba4.tgz", + "integrity": "sha512-vDJMkfCfb0b1A836rgHj+ORuZf4B4+cc2bASQtpeoJLueuFc5DuYwjIZUBrSvx/fO5IrLjLz+oTrB3pcGlhovQ==", + "license": "MIT" + }, "node_modules/p-map": { "version": "7.0.4", "resolved": "https://registry.npmjs.org/p-map/-/p-map-7.0.4.tgz", @@ -6099,6 +6836,12 @@ "node": ">=6" } }, + "node_modules/platform": { + "version": "1.3.6", + "resolved": "https://registry.npmjs.org/platform/-/platform-1.3.6.tgz", + "integrity": "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg==", + "license": "MIT" + }, "node_modules/possible-typed-array-names": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/possible-typed-array-names/-/possible-typed-array-names-1.1.0.tgz", @@ -6186,6 +6929,30 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/protobufjs": { + "version": "7.5.4", + "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.5.4.tgz", + "integrity": "sha512-CvexbZtbov6jW2eXAvLukXjXUW1TzFaivC46BpWc/3BpcCysb5Vffu+B3XHMm8lVEuy2Mm4XGex8hBSg1yapPg==", + "hasInstallScript": true, + "license": "BSD-3-Clause", + "dependencies": { + "@protobufjs/aspromise": "^1.1.2", + "@protobufjs/base64": "^1.1.2", + "@protobufjs/codegen": "^2.0.4", + "@protobufjs/eventemitter": "^1.1.0", + "@protobufjs/fetch": "^1.1.0", + "@protobufjs/float": "^1.0.2", + "@protobufjs/inquire": "^1.1.0", + "@protobufjs/path": "^1.1.2", + "@protobufjs/pool": "^1.1.0", + "@protobufjs/utf8": "^1.1.0", + "@types/node": ">=13.7.0", + "long": "^5.0.0" + }, + "engines": { + "node": ">=12.0.0" + } + }, "node_modules/proxy-from-env": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", @@ -6413,6 +7180,23 @@ "node": ">=0.10.0" } }, + "node_modules/roarr": { + "version": "2.15.4", + "resolved": "https://registry.npmjs.org/roarr/-/roarr-2.15.4.tgz", + "integrity": "sha512-CHhPh+UNHD2GTXNYhPWLnU8ONHdI+5DI+4EYIAOaiD63rHeYlZvyh8P+in5999TTSFgUYuKUAjzRI4mdh/p+2A==", + "license": "BSD-3-Clause", + "dependencies": { + "boolean": "^3.0.1", + "detect-node": "^2.0.4", + "globalthis": "^1.0.1", + "json-stringify-safe": "^5.0.1", + "semver-compare": "^1.0.0", + "sprintf-js": "^1.1.2" + }, + "engines": { + "node": ">=8.0" + } + }, "node_modules/robust-predicates": { "version": "3.0.2", "resolved": "https://registry.npmjs.org/robust-predicates/-/robust-predicates-3.0.2.tgz", @@ -6521,6 +7305,27 @@ "semver": "bin/semver.js" } }, + "node_modules/semver-compare": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/semver-compare/-/semver-compare-1.0.0.tgz", + "integrity": "sha512-YM3/ITh2MJ5MtzaM429anh+x2jiLVjqILF4m4oyQB18W7Ggea7BfqdH/wGMK7dDiMghv/6WG7znWMwUDzJiXow==", + "license": "MIT" + }, + "node_modules/serialize-error": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/serialize-error/-/serialize-error-7.0.1.tgz", + "integrity": "sha512-8I8TjW5KMOKsZQTvoxjuSIa7foAwPWGOts+6o7sgjz41/qMD9VQHEDxi6PBvK2l0MXUmqZyNpUK+T2tQaaElvw==", + "license": "MIT", + "dependencies": { + "type-fest": "^0.13.1" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/set-function-length": { "version": "1.2.2", "resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.2.2.tgz", @@ -6584,6 +7389,62 @@ ], "license": "MIT" }, + "node_modules/sharp": { + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/sharp/-/sharp-0.34.5.tgz", + "integrity": "sha512-Ou9I5Ft9WNcCbXrU9cMgPBcCK8LiwLqcbywW3t4oDV37n1pzpuNLsYiAV8eODnjbtQlSDwZ2cUEeQz4E54Hltg==", + "hasInstallScript": true, + "license": "Apache-2.0", + "dependencies": { + "@img/colour": "^1.0.0", + "detect-libc": "^2.1.2", + "semver": "^7.7.3" + }, + "engines": { + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" + }, + "optionalDependencies": { + "@img/sharp-darwin-arm64": "0.34.5", + "@img/sharp-darwin-x64": "0.34.5", + "@img/sharp-libvips-darwin-arm64": "1.2.4", + "@img/sharp-libvips-darwin-x64": "1.2.4", + "@img/sharp-libvips-linux-arm": "1.2.4", + "@img/sharp-libvips-linux-arm64": "1.2.4", + "@img/sharp-libvips-linux-ppc64": "1.2.4", + "@img/sharp-libvips-linux-riscv64": "1.2.4", + "@img/sharp-libvips-linux-s390x": "1.2.4", + "@img/sharp-libvips-linux-x64": "1.2.4", + "@img/sharp-libvips-linuxmusl-arm64": "1.2.4", + "@img/sharp-libvips-linuxmusl-x64": "1.2.4", + "@img/sharp-linux-arm": "0.34.5", + "@img/sharp-linux-arm64": "0.34.5", + "@img/sharp-linux-ppc64": "0.34.5", + "@img/sharp-linux-riscv64": "0.34.5", + "@img/sharp-linux-s390x": "0.34.5", + "@img/sharp-linux-x64": "0.34.5", + "@img/sharp-linuxmusl-arm64": "0.34.5", + "@img/sharp-linuxmusl-x64": "0.34.5", + "@img/sharp-wasm32": "0.34.5", + "@img/sharp-win32-arm64": "0.34.5", + "@img/sharp-win32-ia32": "0.34.5", + "@img/sharp-win32-x64": "0.34.5" + } + }, + "node_modules/sharp/node_modules/semver": { + "version": "7.7.3", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.3.tgz", + "integrity": "sha512-SdsKMrI9TdgjdweUSR9MweHA4EJ8YxHn8DFaDisvhVlUOe4BF1tLD7GAj0lIqWVl+dPb/rExr0Btby5loQm20Q==", + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, "node_modules/sigma": { "version": "3.0.2", "resolved": "https://registry.npmjs.org/sigma/-/sigma-3.0.2.tgz", @@ -6671,6 +7532,12 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/sprintf-js": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.1.3.tgz", + "integrity": "sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA==", + "license": "BSD-3-Clause" + }, "node_modules/string_decoder": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz", @@ -6735,7 +7602,6 @@ "version": "7.5.2", "resolved": "https://registry.npmjs.org/tar/-/tar-7.5.2.tgz", "integrity": "sha512-7NyxrTE4Anh8km8iEy7o0QYPs+0JKBTj5ZaqHg6B39erLg0qYXN3BijtShwbsNSvQ+LN75+KV+C4QR/f6Gwnpg==", - "dev": true, "license": "BlueOak-1.0.0", "dependencies": { "@isaacs/fs-minipass": "^4.0.0", @@ -6752,7 +7618,6 @@ "version": "5.0.0", "resolved": "https://registry.npmjs.org/yallist/-/yallist-5.0.0.tgz", "integrity": "sha512-YgvUTfwqyc7UXVMrB+SImsVYSmTS8X/tSrtdNZMImM+n7+QTriRXyXim0mBrTXNeqzVF0KWGgHPeiyViFFrNDw==", - "dev": true, "license": "BlueOak-1.0.0", "engines": { "node": ">=18" @@ -6998,6 +7863,18 @@ "license": "0BSD", "optional": true }, + "node_modules/type-fest": { + "version": "0.13.1", + "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.13.1.tgz", + "integrity": "sha512-34R7HTnG0XIJcBSn5XhDd7nNFPRcXYRZrBB2O2jdKqYODldSzBAqzsWoZYYvduky73toYS/ESqxPvkDf/F0XMg==", + "license": "(MIT OR CC0-1.0)", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/typed-array-buffer": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/typed-array-buffer/-/typed-array-buffer-1.0.3.tgz", @@ -7058,7 +7935,6 @@ "version": "7.16.0", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz", "integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==", - "devOptional": true, "license": "MIT" }, "node_modules/unified": { diff --git a/package.json b/package.json index 270f95ff7b..79910530e1 100644 --- a/package.json +++ b/package.json @@ -9,6 +9,7 @@ "preview": "vite preview" }, "dependencies": { + "@huggingface/transformers": "^3.0.0", "@isomorphic-git/lightning-fs": "^4.6.2", "@sigma/edge-curve": "^3.1.0", "@tailwindcss/vite": "^4.1.18", diff --git a/src/components/EmbeddingStatus.tsx b/src/components/EmbeddingStatus.tsx new file mode 100644 index 0000000000..700d0dd019 --- /dev/null +++ b/src/components/EmbeddingStatus.tsx @@ -0,0 +1,125 @@ +import { Brain, Loader2, Check, AlertCircle, Zap } from 'lucide-react'; +import { useAppState } from '../hooks/useAppState'; + +/** + * Embedding status indicator and trigger button + * Shows in header when graph is loaded + */ +export const EmbeddingStatus = () => { + const { + embeddingStatus, + embeddingProgress, + startEmbeddings, + graph, + viewMode + } = useAppState(); + + // Only show when exploring a loaded graph + if (viewMode !== 'exploring' || !graph) return null; + + const handleStartEmbeddings = async () => { + try { + await startEmbeddings(); + } catch (error) { + console.error('Embedding failed:', error); + } + }; + + // Idle state - show button to start + if (embeddingStatus === 'idle') { + return ( + + ); + } + + // Loading model + if (embeddingStatus === 'loading') { + const downloadPercent = embeddingProgress?.modelDownloadPercent ?? 0; + return ( +
+ +
+ Loading AI model... +
+
+
+
+
+ ); + } + + // Embedding in progress + if (embeddingStatus === 'embedding') { + const processed = embeddingProgress?.nodesProcessed ?? 0; + const total = embeddingProgress?.totalNodes ?? 0; + const percent = embeddingProgress?.percent ?? 0; + + return ( +
+ +
+ + Embedding {processed}/{total} nodes + +
+
+
+
+
+ ); + } + + // Indexing + if (embeddingStatus === 'indexing') { + return ( +
+ + Creating vector index... +
+ ); + } + + // Ready + if (embeddingStatus === 'ready') { + return ( +
+ + Semantic Ready +
+ ); + } + + // Error + if (embeddingStatus === 'error') { + return ( + + ); + } + + return null; +}; + diff --git a/src/components/Header.tsx b/src/components/Header.tsx index 0857160526..89ab6267ff 100644 --- a/src/components/Header.tsx +++ b/src/components/Header.tsx @@ -2,6 +2,7 @@ import { Search, Settings, HelpCircle, Sparkles } from 'lucide-react'; import { useAppState } from '../hooks/useAppState'; import { useState, useMemo, useRef, useEffect } from 'react'; import { GraphNode } from '../core/graph/types'; +import { EmbeddingStatus } from './EmbeddingStatus'; // Color mapping for node types in search results const NODE_TYPE_COLORS: Record = { @@ -184,12 +185,15 @@ export const Header = ({ onFocusNode }: HeaderProps) => {
{/* Stats */} {graph && ( -
+
{nodeCount} nodes {edgeCount} edges
)} + {/* Embedding Status */} + + {/* Icon buttons */} +
+ {/* Test button (dev only) */} + {import.meta.env.DEV && ( + + )} + + +
); } diff --git a/src/core/embeddings/embedding-pipeline.ts b/src/core/embeddings/embedding-pipeline.ts index 6b38c2b241..6a0d8619a6 100644 --- a/src/core/embeddings/embedding-pipeline.ts +++ b/src/core/embeddings/embedding-pipeline.ts @@ -59,47 +59,32 @@ const queryEmbeddableNodes = async ( }; /** - * Update a single node's embedding in KuzuDB + * Batch INSERT embeddings into separate CodeEmbedding table + * Using a separate lightweight table avoids copy-on-write overhead + * that occurs when UPDATEing nodes with large content fields */ -const updateNodeEmbedding = async ( - executeQuery: (cypher: string) => Promise, - nodeId: string, - embedding: number[] -): Promise => { - // KuzuDB requires the array to be cast to the correct type - const embeddingStr = `[${embedding.join(',')}]`; - - const cypher = ` - MATCH (n:CodeNode {id: '${nodeId}'}) - SET n.embedding = CAST(${embeddingStr} AS FLOAT[384]) - `; - - await executeQuery(cypher); -}; - -/** - * Batch update multiple node embeddings - * More efficient than individual updates - */ -const batchUpdateEmbeddings = async ( - executeQuery: (cypher: string) => Promise, +const batchInsertEmbeddings = async ( + executeWithReusedStatement: ( + cypher: string, + paramsList: Array> + ) => Promise, updates: Array<{ id: string; embedding: number[] }> ): Promise => { - // Process updates one by one for now - // KuzuDB doesn't have great batch update syntax - for (const update of updates) { - await updateNodeEmbedding(executeQuery, update.id, update.embedding); - } + // INSERT into separate embedding table - much more memory efficient! + const cypher = `CREATE (e:CodeEmbedding {nodeId: $nodeId, embedding: $embedding})`; + const paramsList = updates.map(u => ({ nodeId: u.id, embedding: u.embedding })); + await executeWithReusedStatement(cypher, paramsList); }; /** * Create the vector index for semantic search + * Now indexes the separate CodeEmbedding table */ const createVectorIndex = async ( executeQuery: (cypher: string) => Promise ): Promise => { const cypher = ` - CALL CREATE_VECTOR_INDEX('CodeNode', 'code_embedding_idx', 'embedding', metric := 'cosine') + CALL CREATE_VECTOR_INDEX('CodeEmbedding', 'code_embedding_idx', 'embedding', metric := 'cosine') `; try { @@ -116,11 +101,13 @@ const createVectorIndex = async ( * Run the embedding pipeline * * @param executeQuery - Function to execute Cypher queries against KuzuDB + * @param executeWithReusedStatement - Function to execute with reused prepared statement * @param onProgress - Callback for progress updates * @param config - Optional configuration override */ export const runEmbeddingPipeline = async ( executeQuery: (cypher: string) => Promise, + executeWithReusedStatement: (cypher: string, paramsList: Array>) => Promise, onProgress: EmbeddingProgressCallback, config: Partial = {} ): Promise => { @@ -203,7 +190,7 @@ export const runEmbeddingPipeline = async ( embedding: embeddingToArray(embeddings[i]), })); - await batchUpdateEmbeddings(executeQuery, updates); + await batchInsertEmbeddings(executeWithReusedStatement, updates); processedNodes += batch.length; @@ -264,6 +251,8 @@ export const runEmbeddingPipeline = async ( /** * Perform semantic search using the vector index * + * Uses separate CodeEmbedding table and JOINs with CodeNode for metadata + * * @param executeQuery - Function to execute Cypher queries * @param query - Search query text * @param k - Number of results to return (default: 10) @@ -285,15 +274,16 @@ export const semanticSearch = async ( const queryVec = embeddingToArray(queryEmbedding); const queryVecStr = `[${queryVec.join(',')}]`; - // Query the vector index + // Query the vector index on CodeEmbedding, then JOIN with CodeNode for metadata const cypher = ` - CALL QUERY_VECTOR_INDEX('CodeNode', 'code_embedding_idx', + CALL QUERY_VECTOR_INDEX('CodeEmbedding', 'code_embedding_idx', CAST(${queryVecStr} AS FLOAT[384]), ${k}) - YIELD node, distance + YIELD node AS emb, distance WHERE distance < ${maxDistance} - RETURN node.id AS nodeId, node.name AS name, node.label AS label, - node.filePath AS filePath, distance, - node.startLine AS startLine, node.endLine AS endLine + MATCH (n:CodeNode {id: emb.nodeId}) + RETURN n.id AS nodeId, n.name AS name, n.label AS label, + n.filePath AS filePath, distance, + n.startLine AS startLine, n.endLine AS endLine ORDER BY distance `; @@ -314,6 +304,8 @@ export const semanticSearch = async ( * Semantic search with graph expansion * Finds similar nodes AND their connections * + * Uses separate CodeEmbedding table and JOINs with CodeNode + * * @param executeQuery - Function to execute Cypher queries * @param query - Search query text * @param k - Number of initial results @@ -335,12 +327,13 @@ export const semanticSearchWithContext = async ( const queryVec = embeddingToArray(queryEmbedding); const queryVecStr = `[${queryVec.join(',')}]`; - // Query with graph expansion + // Query embedding table, JOIN with CodeNode, then expand graph const cypher = ` - CALL QUERY_VECTOR_INDEX('CodeNode', 'code_embedding_idx', + CALL QUERY_VECTOR_INDEX('CodeEmbedding', 'code_embedding_idx', CAST(${queryVecStr} AS FLOAT[384]), ${k}) - YIELD node AS match, distance + YIELD node AS emb, distance WHERE distance < 0.5 + MATCH (match:CodeNode {id: emb.nodeId}) MATCH (match)-[r:CodeRelation*1..${hops}]-(connected:CodeNode) RETURN match.id AS matchId, match.name AS matchName, match.label AS matchLabel, match.filePath AS matchPath, distance, diff --git a/src/core/kuzu/kuzu-adapter.ts b/src/core/kuzu/kuzu-adapter.ts index 59710e0344..eea3710d54 100644 --- a/src/core/kuzu/kuzu-adapter.ts +++ b/src/core/kuzu/kuzu-adapter.ts @@ -8,7 +8,7 @@ */ import { KnowledgeGraph } from '../graph/types'; -import { NODE_SCHEMA, EDGE_SCHEMA, NODE_TABLE_NAME, EDGE_TABLE_NAME } from './schema'; +import { NODE_SCHEMA, EDGE_SCHEMA, EMBEDDING_SCHEMA, NODE_TABLE_NAME, EDGE_TABLE_NAME } from './schema'; import { generateNodeCSV, generateEdgeCSV } from './csv-generator'; // Holds the reference to the dynamically loaded module @@ -34,8 +34,11 @@ export const initKuzu = async () => { // 3. Initialize WASM await kuzu.init(); - // 4. Create Database - db = new kuzu.Database(':memory:'); + // 4. Create Database with 512MB buffer pool + // Larger buffer needed for embedding storage (6K+ nodes ร— 384 floats) + // Constructor: Database(path, bufferPoolSize, maxNumThreads, enableCompression, readOnly) + const BUFFER_POOL_SIZE = 512 * 1024 * 1024; // 512MB + db = new kuzu.Database(':memory:', BUFFER_POOL_SIZE); conn = new kuzu.Connection(db); if (import.meta.env.DEV) console.log('โœ… KuzuDB WASM Initialized'); @@ -44,6 +47,7 @@ export const initKuzu = async () => { try { await conn.query(NODE_SCHEMA); await conn.query(EDGE_SCHEMA); + await conn.query(EMBEDDING_SCHEMA); if (import.meta.env.DEV) console.log('โœ… KuzuDB Schema Created'); } catch { // Schema might already exist, skip @@ -190,3 +194,160 @@ export const closeKuzu = async (): Promise => { } kuzu = null; }; + +/** + * Execute a prepared statement with parameters + * @param cypher - Cypher query with $param placeholders + * @param params - Object mapping param names to values + * @returns Query results + */ +export const executePrepared = async ( + cypher: string, + params: Record +): Promise => { + if (!conn) { + await initKuzu(); + } + + try { + // Note: conn.prepare is async in kuzu-wasm + const stmt = await conn.prepare(cypher); + if (!stmt.isSuccess()) { + const errMsg = await stmt.getErrorMessage(); + throw new Error(`Prepare failed: ${errMsg}`); + } + + const result = await conn.execute(stmt, params); + + // Collect all rows + const rows: any[] = []; + while (await result.hasNext()) { + const row = await result.getNext(); + rows.push(row); + } + + await stmt.close(); + return rows; + } catch (error) { + if (import.meta.env.DEV) console.error('Prepared query failed:', error); + throw error; + } +}; + +/** + * Execute a prepared statement with multiple parameter sets in small sub-batches + * Recreates statement every SUB_BATCH_SIZE executions to allow memory cleanup + * @param cypher - Cypher query with $param placeholders + * @param paramsList - Array of parameter objects to execute + */ +export const executeWithReusedStatement = async ( + cypher: string, + paramsList: Array> +): Promise => { + if (!conn) { + await initKuzu(); + } + + if (paramsList.length === 0) return; + + // Small sub-batch to allow memory cleanup between statement recreations + const SUB_BATCH_SIZE = 4; + + for (let i = 0; i < paramsList.length; i += SUB_BATCH_SIZE) { + const subBatch = paramsList.slice(i, i + SUB_BATCH_SIZE); + + // Create fresh statement for each sub-batch + const stmt = await conn.prepare(cypher); + if (!stmt.isSuccess()) { + const errMsg = await stmt.getErrorMessage(); + throw new Error(`Prepare failed: ${errMsg}`); + } + + try { + for (const params of subBatch) { + await conn.execute(stmt, params); + } + } finally { + await stmt.close(); + } + + // Small delay to allow garbage collection between sub-batches + if (i + SUB_BATCH_SIZE < paramsList.length) { + await new Promise(r => setTimeout(r, 0)); + } + } +}; + +/** + * Test if array parameters work with prepared statements + * This is a diagnostic function to check KuzuDB WASM capabilities + */ +export const testArrayParams = async (): Promise<{ success: boolean; error?: string }> => { + if (!conn) { + await initKuzu(); + } + + try { + // Test with a simple array parameter + const testEmbedding = new Array(384).fill(0).map((_, i) => i / 384); + + // First, get any node ID to test with + const nodeResult = await conn.query(`MATCH (n:${NODE_TABLE_NAME}) RETURN n.id AS id LIMIT 1`); + const nodeRow = await nodeResult.getNext(); + + if (!nodeRow) { + return { success: false, error: 'No nodes found to test with' }; + } + + const testNodeId = nodeRow.id ?? nodeRow[0]; + + if (import.meta.env.DEV) { + console.log('๐Ÿงช Testing array params with node:', testNodeId); + console.log('๐Ÿงช Embedding sample (first 5):', testEmbedding.slice(0, 5)); + } + + // Try using prepared statement with array param + // Note: conn.prepare is async in kuzu-wasm + const cypher = `MATCH (n:${NODE_TABLE_NAME} {id: $nodeId}) SET n.embedding = $embedding`; + const stmt = await conn.prepare(cypher); + + // In async API, isSuccess() returns boolean directly + if (!stmt.isSuccess()) { + const errMsg = await stmt.getErrorMessage(); + return { success: false, error: `Prepare failed: ${errMsg}` }; + } + + // Execute with array parameter + await conn.execute(stmt, { + nodeId: testNodeId, + embedding: testEmbedding, + }); + + await stmt.close(); + + // Verify it was stored + const verifyResult = await conn.query( + `MATCH (n:${NODE_TABLE_NAME} {id: '${testNodeId}'}) RETURN n.embedding AS emb` + ); + const verifyRow = await verifyResult.getNext(); + const storedEmb = verifyRow?.emb ?? verifyRow?.[0]; + + if (storedEmb && Array.isArray(storedEmb) && storedEmb.length === 384) { + if (import.meta.env.DEV) { + console.log('โœ… Array params WORK! Stored embedding length:', storedEmb.length); + } + return { success: true }; + } else { + return { + success: false, + error: `Embedding not stored correctly. Got: ${typeof storedEmb}, length: ${storedEmb?.length}` + }; + } + } catch (error) { + const errorMsg = error instanceof Error ? error.message : String(error); + if (import.meta.env.DEV) { + console.error('โŒ Array params test failed:', errorMsg); + } + return { success: false, error: errorMsg }; + } +}; diff --git a/src/core/kuzu/schema.ts b/src/core/kuzu/schema.ts index adea1d2e19..b849136add 100644 --- a/src/core/kuzu/schema.ts +++ b/src/core/kuzu/schema.ts @@ -10,11 +10,12 @@ export const NODE_TABLE_NAME = 'CodeNode'; export const EDGE_TABLE_NAME = 'CodeRelation'; +export const EMBEDDING_TABLE_NAME = 'CodeEmbedding'; /** * Node table schema * Stores all code elements: Files, Functions, Classes, etc. - * embedding column stores 384-dimensional vectors for semantic search + * Note: Embeddings stored separately to avoid copy-on-write overhead */ export const NODE_SCHEMA = ` CREATE NODE TABLE ${NODE_TABLE_NAME} ( @@ -25,16 +26,27 @@ CREATE NODE TABLE ${NODE_TABLE_NAME} ( startLine INT64, endLine INT64, content STRING, - embedding FLOAT[384], PRIMARY KEY (id) )`; +/** + * Separate embedding table - lightweight structure for vector storage + * This avoids copy-on-write issues when storing embeddings + * (UPDATEing nodes with large content fields would copy entire node) + */ +export const EMBEDDING_SCHEMA = ` +CREATE NODE TABLE ${EMBEDDING_TABLE_NAME} ( + nodeId STRING, + embedding FLOAT[384], + PRIMARY KEY (nodeId) +)`; + /** * Create vector index for semantic search * Uses HNSW (Hierarchical Navigable Small World) algorithm with cosine similarity */ export const CREATE_VECTOR_INDEX_QUERY = ` -CALL CREATE_VECTOR_INDEX('${NODE_TABLE_NAME}', 'code_embedding_idx', 'embedding', metric := 'cosine') +CALL CREATE_VECTOR_INDEX('${EMBEDDING_TABLE_NAME}', 'code_embedding_idx', 'embedding', metric := 'cosine') `; /** @@ -50,5 +62,5 @@ CREATE REL TABLE ${EDGE_TABLE_NAME} ( /** * All schema creation queries in order */ -export const SCHEMA_QUERIES = [NODE_SCHEMA, EDGE_SCHEMA]; +export const SCHEMA_QUERIES = [NODE_SCHEMA, EDGE_SCHEMA, EMBEDDING_SCHEMA]; diff --git a/src/hooks/useAppState.tsx b/src/hooks/useAppState.tsx index 0bd761d72f..59570932ad 100644 --- a/src/hooks/useAppState.tsx +++ b/src/hooks/useAppState.tsx @@ -79,6 +79,9 @@ interface AppState { semanticSearch: (query: string, k?: number) => Promise; semanticSearchWithContext: (query: string, k?: number, hops?: number) => Promise; isEmbeddingReady: boolean; + + // Debug/test methods + testArrayParams: () => Promise<{ success: boolean; error?: string }>; } const AppStateContext = createContext(null); @@ -251,6 +254,12 @@ export const AppStateProvider = ({ children }: { children: ReactNode }) => { return api.semanticSearchWithContext(query, k, hops); }, []); + const testArrayParams = useCallback(async (): Promise<{ success: boolean; error?: string }> => { + const api = apiRef.current; + if (!api) return { success: false, error: 'Worker not initialized' }; + return api.testArrayParams(); + }, []); + const toggleLabelVisibility = useCallback((label: NodeLabel) => { setVisibleLabels(prev => { if (prev.includes(label)) { @@ -300,6 +309,8 @@ export const AppStateProvider = ({ children }: { children: ReactNode }) => { semanticSearch, semanticSearchWithContext, isEmbeddingReady: embeddingStatus === 'ready', + // Debug + testArrayParams, }; return ( diff --git a/src/workers/ingestion.worker.ts b/src/workers/ingestion.worker.ts index 4087b655cc..c281fbaff0 100644 --- a/src/workers/ingestion.worker.ts +++ b/src/workers/ingestion.worker.ts @@ -187,7 +187,7 @@ const workerApi = { onProgress(progress); }; - await runEmbeddingPipeline(kuzu.executeQuery, progressCallback); + await runEmbeddingPipeline(kuzu.executeQuery, kuzu.executeWithReusedStatement, progressCallback); }, /** @@ -266,6 +266,18 @@ const workerApi = { isEmbeddingComplete = false; embeddingProgress = null; }, + + /** + * Test if KuzuDB supports array parameters in prepared statements + * This is a diagnostic function + */ + async testArrayParams(): Promise<{ success: boolean; error?: string }> { + const kuzu = await getKuzuAdapter(); + if (!kuzu.isKuzuReady()) { + return { success: false, error: 'Database not ready' }; + } + return kuzu.testArrayParams(); + }, }; // Expose the worker API to the main thread From a85f72a282e51021867285ac90d4f20464349b34 Mon Sep 17 00:00:00 2001 From: abhigyanpatwari Date: Mon, 5 Jan 2026 20:21:09 +0530 Subject: [PATCH 3/6] readme uppdate --- .gitignore | 1 + README.md | 58 +++++++++++++------------- src/components/DropZone.tsx | 2 +- src/core/ingestion/ast-cache.ts | 2 +- src/core/ingestion/call-processor.ts | 4 +- src/core/ingestion/import-processor.ts | 4 +- src/core/ingestion/symbol-table.ts | 2 +- src/core/kuzu/csv-generator.ts | 2 +- src/core/kuzu/kuzu-adapter.ts | 2 +- src/core/tree-sitter/parser-loader.ts | 2 +- src/hooks/useSigma.ts | 2 +- src/lib/graph-adapter.ts | 6 +-- src/services/git-clone.ts | 8 ++-- src/workers/ingestion.worker.ts | 2 +- 14 files changed, 49 insertions(+), 48 deletions(-) diff --git a/.gitignore b/.gitignore index 301c1c21c0..bda69c34f5 100644 --- a/.gitignore +++ b/.gitignore @@ -35,3 +35,4 @@ coverage/ .vercel + diff --git a/README.md b/README.md index 05bfda25c1..49622042e0 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ Transform codebases into interactive knowledge graphs using AST parsing, Web Workers, and an embedded KuzuDB WASM database. All processing happens locally - your code never leaves your machine. -**Next up:** Browser-based embeddings + Graph RAG. The cool part? KuzuDB supports native vector indexing, so we can do semantic search AND graph traversal in a single Cypher query. No separate vector DB needed. See [Work in Progress](#-current-work-in-progress) for the full plan. +**Next up:** Browser-based embeddings + Graph RAG. The cool part? KuzuDB supports native vector indexing, so I can do semantic search AND graph traversal in a single Cypher query. No separate vector DB needed. See [Work in Progress](#-current-work-in-progress) for the full plan. @@ -28,9 +28,9 @@ https://github.com/user-attachments/assets/f375b00a-78cd-4f93-a96c-9ba924455f49 ### ๐Ÿง  Graph RAG: The Plan -Here's what we're building for the AI layer. The goal: ask questions in plain English, get answers backed by actual graph traversal + semantic understanding. +Here's what I'm building for the AI layer. The goal: ask questions in plain English, get answers backed by actual graph traversal + semantic understanding. -**The Problem:** A regular LLM doesn't know your codebase. It can't tell you what calls `handleAuth` or what breaks if you change `UserService`. You need to give it tools to explore the graph. +**The Problem:** A regular LLM doesn't know your codebase. It can't tell you what calls `handleAuth` or what breaks if you change `UserService`. I need to give it tools to explore the graph. **The Solution:** Combine embeddings (for "find relevant code by meaning") with graph queries (for "trace connections"). @@ -44,7 +44,7 @@ flowchart TD CTX --> LLM[LLM Generates Answer] ``` -**Embedding Model:** We're going with `snowflake-arctic-embed-xs` - a tiny 22M parameter model that runs entirely in the browser via [transformers.js](https://huggingface.co/docs/transformers.js). It outputs 384-dimensional vectors and scores 50.15 on MTEB (comparable to models 5x its size). The model downloads once (~90MB), gets cached, and runs locally forever. Privacy intact. โœ… +**Embedding Model:** I'm going with `snowflake-arctic-embed-xs` - a tiny 22M parameter model that runs entirely in the browser via [transformers.js](https://huggingface.co/docs/transformers.js). It outputs 384-dimensional vectors and scores 50.15 on MTEB (comparable to models 5x its size). The model downloads once (~90MB), gets cached, and runs locally forever. Privacy intact. โœ… **The Pipeline:** @@ -74,7 +74,7 @@ While designing this, I stumbled onto something cool. Most Graph RAG systems use 2. Take those IDs โ†’ call graph DB 3. Coordinate between two systems -But KuzuDB WASM supports **native vector indexing** (HNSW). Which means we can do vector search AND graph traversal **in a single Cypher query**: +But KuzuDB WASM supports **native vector indexing** (HNSW). Which means it's possible to do vector search AND graph traversal **in a single Cypher query**: ```cypher -- Find code similar to "authentication" AND trace what calls it @@ -103,7 +103,7 @@ graph_query("MATCH ... WHERE id IN [...]") โ†’ results cypher("CALL QUERY_VECTOR_INDEX(...) WITH node MATCH (node)-[...]->() ...") โ†’ results ``` -And because `distance` comes back with every result, we get **built-in reranking for free**: +And because `distance` comes back with every result, this provides **built-in reranking for free**: ```cypher -- The LLM can dynamically control relevance thresholds! @@ -141,13 +141,13 @@ V2 is a major refactor focused on **performance** and **scalability**. Here's wh V1 used D3.js force simulation which worked great for small graphs, but started choking around 2-3k nodes. The browser would freeze, fans would spin, and you'd be staring at a loading spinner. -**V2 uses Sigma.js with WebGL rendering.** This means the GPU does the heavy lifting instead of JavaScript. We've tested graphs with 10k+ nodes and they render smoothly. Pan, zoom, click - all buttery smooth. +**V2 uses Sigma.js with WebGL rendering.** This means the GPU does the heavy lifting instead of JavaScript. I've tested graphs with 10k+ nodes and they render smoothly. Pan, zoom, click - all buttery smooth. The layout algorithm also moved to **ForceAtlas2 running in a Web Worker**, so your UI stays responsive while the graph positions itself. ### ๐Ÿ—‚๏ธ Dual HashMap Symbol Table (Goodbye Trie, Hello Speed) -In V1, we used a **Trie** (prefix tree) to store function/class definitions. It was clever - you could do fuzzy lookups and autocomplete. But it was also slow and memory-hungry for large codebases. +In V1, I used a **Trie** (prefix tree) to store function/class definitions. It was clever - you could do fuzzy lookups and autocomplete. But it was also slow and memory-hungry for large codebases. V2 uses a simpler but faster **Dual HashMap** approach: @@ -156,17 +156,17 @@ File-Scoped Index: Map> Global Index: Map ``` -**Why two maps?** When resolving a function call like `handleAuth()`, we first check if it's defined in a file we imported (high confidence). If not, we check the current file. As a last resort, we search globally (useful for framework magic like FastAPI's `@app.get` decorators where the connection isn't explicit in imports). +**Why two maps?** When resolving a function call like `handleAuth()`, the system first checks if it's defined in a file that was imported (high confidence). If not, it checks the current file. As a last resort, it searches globally (useful for framework magic like FastAPI's `@app.get` decorators where the connection isn't explicit in imports). -This change alone gave us **~2x speedup** on the parsing phase. +This change alone provided a **~2x speedup** on the parsing phase. ### ๐Ÿ’พ LRU Cache for AST Trees (Memory That Cleans Itself) -Tree-sitter generates AST (Abstract Syntax Tree) objects that live in WASM memory. In V1, we'd keep all of them around, which meant memory usage grew linearly with file count. Parse 5000 files? That's 5000 AST objects eating RAM. +Tree-sitter generates AST (Abstract Syntax Tree) objects that live in WASM memory. In V1, I kept all of them around, which meant memory usage grew linearly with file count. Parse 5000 files? That's 5000 AST objects eating RAM. -V2 uses an **LRU (Least Recently Used) cache** with a cap of 50 entries. When we need to parse file #51, the oldest unused AST gets evicted and we call `tree.delete()` to free the WASM memory. +V2 uses an **LRU (Least Recently Used) cache** with a cap of 50 entries. When the system needs to parse file #51, the oldest unused AST gets evicted and `tree.delete()` is called to free the WASM memory. -The clever part: we parse files in Phase 3, then reuse those ASTs in Phase 4 (imports) and Phase 5 (calls). The LRU cache keeps recently-parsed files hot, so we rarely need to re-parse. +The clever part: files are parsed in Phase 3, then those ASTs are reused in Phase 4 (imports) and Phase 5 (calls). The LRU cache keeps recently-parsed files hot, so re-parsing is rarely needed. ### ๐Ÿ“Š Overall Results @@ -318,21 +318,21 @@ flowchart TD ### What Each Phase Does -**Phase 1: Extract** - We use JSZip to decompress your ZIP file and store all file contents in a Map. Simple but necessary. +**Phase 1: Extract** - JSZip is used to decompress your ZIP file and store all file contents in a Map. Simple but necessary. -**Phase 2: Structure** - We walk through all file paths and build a tree of folders and files. A path like `src/components/Button.tsx` creates nodes for `src`, `components`, and `Button.tsx` with `CONTAINS` relationships connecting them. +**Phase 2: Structure** - The system walks through all file paths and builds a tree of folders and files. A path like `src/components/Button.tsx` creates nodes for `src`, `components`, and `Button.tsx` with `CONTAINS` relationships connecting them. -**Phase 3: Parsing** - This is where the magic happens. Tree-sitter parses each file into an AST, and we extract all the interesting bits: functions, classes, interfaces, methods. These get stored in our Symbol Table for later lookup. +**Phase 3: Parsing** - This is where the magic happens. Tree-sitter parses each file into an AST, and extracts all the interesting bits: functions, classes, interfaces, methods. These get stored in the Symbol Table for later lookup. -**Phase 4: Imports** - We find all `import` and `require` statements and figure out which files they point to. `import { foo } from './utils'` might resolve to `./utils.ts`, `./utils/index.ts`, etc. We try common extensions until we find a match. +**Phase 4: Imports** - The pipeline finds all `import` and `require` statements and determines which files they point to. `import { foo } from './utils'` might resolve to `./utils.ts`, `./utils/index.ts`, etc. Common extensions are tried until a match is found. -**Phase 5: Calls** - The trickiest phase. We find all function calls and try to figure out what they're calling. We use our resolution strategy (import map โ†’ local โ†’ global) to link calls to their definitions. +**Phase 5: Calls** - The trickiest phase. The pipeline finds all function calls and determines what they're calling. It uses a resolution strategy (import map โ†’ local โ†’ global) to link calls to their definitions. --- ## Symbol Resolution: How We Link Function Calls -When we see code like this: +When the system encounters code like this: ```typescript import { validateUser } from './auth'; @@ -342,7 +342,7 @@ function login() { } ``` -We need to figure out that `validateUser()` refers to the function defined in `./auth.ts`. Here's our strategy: +The system needs to figure out that `validateUser()` refers to the function defined in `./auth.ts`. Here's the strategy: ```mermaid flowchart TD @@ -373,13 +373,13 @@ def get_users(): return db.query(User) # Where does 'db' come from? ``` -The `db` object might be injected by the framework, not explicitly imported. Our global search catches these cases (with lower confidence). +The `db` object might be injected by the framework, not explicitly imported. The global search catches these cases (with lower confidence). --- ## LRU AST Cache -Parsing files into ASTs is expensive, and AST objects live in WASM memory (which doesn't get garbage collected like regular JS objects). We use an LRU cache to keep memory bounded: +Parsing files into ASTs is expensive, and AST objects live in WASM memory (which doesn't get garbage collected like regular JS objects). An LRU cache is used to keep memory bounded: ```mermaid flowchart LR @@ -441,7 +441,7 @@ flowchart LR ## KuzuDB Integration -We load the graph into KuzuDB (an embedded graph database) so you can run Cypher queries: +The graph is loaded into KuzuDB (an embedded graph database) so you can run Cypher queries: ```mermaid flowchart TD @@ -561,7 +561,7 @@ flowchart TD - "Show me the blast radius if I change `UserService`" โ†’ Finds service, traverses 3 hops of dependencies - "How does authentication work in this codebase?" โ†’ Semantic search for auth-related code, returns connected components -**Why dynamic Cypher generation?** Originally we planned to use pre-built query templates (because LLMs can be... creative with syntax). But with the unified vector + graph approach, the LLM just needs to learn one pattern: +**Why dynamic Cypher generation?** Originally I planned to use pre-built query templates (because LLMs can be... creative with syntax). But with the unified vector + graph approach, the LLM just needs to learn one pattern: ```cypher CALL QUERY_VECTOR_INDEX(...) WITH node, distance @@ -577,11 +577,11 @@ Give the LLM the schema, a few examples, and let it compose queries. The schema ## ๐Ÿ”ฌ Deep Dive: Copy-on-Write Woes with In-Memory WASM Databases -While building the embedding pipeline, we hit an interesting memory problem. Documenting it here because it's a non-obvious gotcha for anyone doing vector storage in browser-side databases. +While building the embedding pipeline, I hit an interesting memory problem. Documenting it here because it's a non-obvious gotcha for anyone doing vector storage in browser-side databases. ### The Setup -We wanted to store 384-dimensional embeddings alongside our code nodes. Natural instinct: add an `embedding FLOAT[384]` column to the existing `CodeNode` table, bulk load the graph, then `UPDATE` each node with its embedding. +I wanted to store 384-dimensional embeddings alongside the code nodes. Natural instinct: add an `embedding FLOAT[384]` column to the existing `CodeNode` table, bulk load the graph, then `UPDATE` each node with its embedding. ```cypher -- Seemed reasonable, right? @@ -596,7 +596,7 @@ Worked fine for ~20 nodes. Exploded at ~1000 nodes with: Buffer manager exception: Unable to allocate memory! The buffer pool is full! ``` -We had a 512MB buffer pool. 1000 embeddings ร— 384 floats ร— 4 bytes = ~1.5MB. Where did 512MB go? +I configured a 512MB buffer pool. 1000 embeddings ร— 384 floats ร— 4 bytes = ~1.5MB. Where did 512MB go? **Answer: Copy-on-Write (COW).** @@ -647,13 +647,13 @@ flowchart TD New -->|"INSERT into lightweight table"| WIN[Works at scale] ``` -Now we: +Now the process is: 1. Bulk load `CodeNode` (no embedding column) 2. `CREATE` rows in `CodeEmbedding` table (just `nodeId` + `embedding`) 3. Vector index lives on `CodeEmbedding` 4. Semantic search JOINs back to `CodeNode` for metadata -**Trade-off:** Every semantic search needs a JOIN. But it's a primary key lookup (O(1)), so we're talking ~1-5ms extra per query. Totally worth it to not explode at 1000 nodes. +**Trade-off:** Every semantic search needs a JOIN. But it's a primary key lookup (O(1)), so it's only ~1-5ms extra per query. Totally worth it to not explode at 1000 nodes. ### Lessons Learned diff --git a/src/components/DropZone.tsx b/src/components/DropZone.tsx index fea369299b..ad286adfc0 100644 --- a/src/components/DropZone.tsx +++ b/src/components/DropZone.tsx @@ -330,7 +330,7 @@ export const DropZone = ({ onFileSelect, onGitClone }: DropZoneProps) => { {/* Security note */} {githubToken && (

- ๐Ÿ”’ Token stays in your browser only, never sent to our servers + ๐Ÿ”’ Token stays in your browser only, never sent to any server

)} diff --git a/src/core/ingestion/ast-cache.ts b/src/core/ingestion/ast-cache.ts index cd2244eefd..61775416a3 100644 --- a/src/core/ingestion/ast-cache.ts +++ b/src/core/ingestion/ast-cache.ts @@ -1,7 +1,7 @@ import { LRUCache } from 'lru-cache'; import Parser from 'web-tree-sitter'; -// Define the interface for our Cache +// Define the interface for the Cache export interface ASTCache { get: (filePath: string) => Parser.Tree | undefined; set: (filePath: string, tree: Parser.Tree) => void; diff --git a/src/core/ingestion/call-processor.ts b/src/core/ingestion/call-processor.ts index 024e89599a..0706767aa5 100644 --- a/src/core/ingestion/call-processor.ts +++ b/src/core/ingestion/call-processor.ts @@ -90,7 +90,7 @@ export const processCalls = async ( }); }); - // Cleanup if we re-parsed + // Cleanup if re-parsed if (wasReparsed) { tree.delete(); } @@ -133,7 +133,7 @@ const resolveCallTarget = ( /** * Filter out common built-in functions and noise - * that we don't want to track as calls + * that shouldn't be tracked as calls */ const isBuiltInOrNoise = (name: string): boolean => { const builtIns = new Set([ diff --git a/src/core/ingestion/import-processor.ts b/src/core/ingestion/import-processor.ts index 024fd0f9b1..77fba9a801 100644 --- a/src/core/ingestion/import-processor.ts +++ b/src/core/ingestion/import-processor.ts @@ -103,7 +103,7 @@ export const processImports = async ( // Clean path (remove quotes) const rawImportPath = sourceNode.text.replace(/['"]/g, ''); - // Resolve to actual file in our system + // Resolve to actual file in the system const resolvedPath = resolveImportPath(file.path, rawImportPath, allFilePaths); if (resolvedPath) { @@ -129,7 +129,7 @@ export const processImports = async ( } }); - // If we re-parsed just for this, delete the tree to save memory + // If re-parsed just for this, delete the tree to save memory if (wasReparsed) { tree.delete(); } diff --git a/src/core/ingestion/symbol-table.ts b/src/core/ingestion/symbol-table.ts index 99e8ffdbfc..c8c35d56f9 100644 --- a/src/core/ingestion/symbol-table.ts +++ b/src/core/ingestion/symbol-table.ts @@ -23,7 +23,7 @@ export interface SymbolTable { lookupFuzzy: (name: string) => SymbolDefinition[]; /** - * Debugging: See how many symbols we have tracked + * Debugging: See how many symbols are tracked */ getStats: () => { fileCount: number; globalSymbolCount: number }; diff --git a/src/core/kuzu/csv-generator.ts b/src/core/kuzu/csv-generator.ts index 4e4414a8c1..a3bdea3ad9 100644 --- a/src/core/kuzu/csv-generator.ts +++ b/src/core/kuzu/csv-generator.ts @@ -1,7 +1,7 @@ /** * CSV Generator for KuzuDB * - * Converts our in-memory KnowledgeGraph into CSV format + * Converts the in-memory KnowledgeGraph into CSV format * for bulk loading into KuzuDB. * * RFC 4180 Compliant: diff --git a/src/core/kuzu/kuzu-adapter.ts b/src/core/kuzu/kuzu-adapter.ts index eea3710d54..f470031f27 100644 --- a/src/core/kuzu/kuzu-adapter.ts +++ b/src/core/kuzu/kuzu-adapter.ts @@ -88,7 +88,7 @@ export const loadGraphToKuzu = async ( await fs.writeFile(edgesPath, edgesCSV); - // Use HEADER=true because our CSV generator adds headers + // Use HEADER=true because the CSV generator adds headers // Use PARALLEL=false because content field has quoted newlines // Explicitly list columns since CSV doesn't include 'embedding' (populated later via UPDATE) await conn.query(`COPY ${NODE_TABLE_NAME}(id, label, name, filePath, startLine, endLine, content) FROM "${nodesPath}" (HEADER=true, PARALLEL=false)`); diff --git a/src/core/tree-sitter/parser-loader.ts b/src/core/tree-sitter/parser-loader.ts index 4a45f69005..e6092f8b6d 100644 --- a/src/core/tree-sitter/parser-loader.ts +++ b/src/core/tree-sitter/parser-loader.ts @@ -3,7 +3,7 @@ import { SupportedLanguages } from '../../config/supported-languages'; let parser: Parser | null = null; -// Cache the compiled Language objects so we never fetch/compile twice +// Cache the compiled Language objects to avoid fetching/compiling twice const languageCache = new Map(); export const loadParser = async (): Promise => { diff --git a/src/hooks/useSigma.ts b/src/hooks/useSigma.ts index ae93848e87..e2e923dd90 100644 --- a/src/hooks/useSigma.ts +++ b/src/hooks/useSigma.ts @@ -71,7 +71,7 @@ interface UseSigmaReturn { refreshHighlights: () => void; } -// Noverlap for final cleanup - minimal since we start with good positions +// Noverlap for final cleanup - minimal since it starts with good positions const NOVERLAP_SETTINGS = { maxIterations: 20, // Reduced - less cleanup needed ratio: 1.1, diff --git a/src/lib/graph-adapter.ts b/src/lib/graph-adapter.ts index e7fc744e04..c23d6baa52 100644 --- a/src/lib/graph-adapter.ts +++ b/src/lib/graph-adapter.ts @@ -33,7 +33,7 @@ export interface SigmaEdgeAttributes { */ const getScaledNodeSize = (baseSize: number, nodeCount: number): number => { // Scale factor decreases as graph gets larger - // But we use a minimum that preserves relative differences + // But a minimum is used that preserves relative differences if (nodeCount > 50000) return Math.max(1, baseSize * 0.4); if (nodeCount > 20000) return Math.max(1.5, baseSize * 0.5); if (nodeCount > 5000) return Math.max(2, baseSize * 0.65); @@ -72,7 +72,7 @@ const getNodeMass = (nodeType: NodeLabel, nodeCount: number): number => { }; /** - * Converts our KnowledgeGraph to a graphology Graph for Sigma.js + * Converts the KnowledgeGraph to a graphology Graph for Sigma.js * Folders are positioned in a wide spread, children positioned NEAR their parents */ export const knowledgeGraphToGraphology = ( @@ -208,7 +208,7 @@ export const knowledgeGraphToGraphology = ( if (!visited.has(childId)) { visited.add(childId); addNodeWithPosition(childId); - queue.push(childId); // Add to queue so we process ITS children too + queue.push(childId); // Add to queue so its children are processed too } } } diff --git a/src/services/git-clone.ts b/src/services/git-clone.ts index 8dcec1b506..7924462521 100644 --- a/src/services/git-clone.ts +++ b/src/services/git-clone.ts @@ -17,11 +17,11 @@ const initFS = () => { return fsName; }; -// Use public proxy in development, our own proxy in production +// Use public proxy in development, a custom proxy in production const USE_OWN_PROXY = !import.meta.env.DEV; /** - * Custom HTTP client that uses our query-param based proxy in production + * Custom HTTP client that uses a query-param based proxy in production * isomorphic-git's default corsProxy appends URL as path, which doesn't work * well with Vercel's file-based routing. */ @@ -31,10 +31,10 @@ const createProxiedHttp = (): typeof http => { return http; } - // In production, wrap the HTTP client to use our proxy + // In production, wrap the HTTP client to use the custom proxy return { request: async (config) => { - // Rewrite the URL to go through our proxy + // Rewrite the URL to go through the proxy const proxyUrl = `/api/proxy?url=${encodeURIComponent(config.url)}`; // Call the original http.request with the proxied URL diff --git a/src/workers/ingestion.worker.ts b/src/workers/ingestion.worker.ts index c281fbaff0..d56643acc4 100644 --- a/src/workers/ingestion.worker.ts +++ b/src/workers/ingestion.worker.ts @@ -28,7 +28,7 @@ let isEmbeddingComplete = false; * Worker API exposed via Comlink * * Note: The onProgress callback is passed as a Comlink.proxy() from the main thread, - * allowing us to call it from the worker and have it execute on the main thread. + * allowing it to be called from the worker and have it execute on the main thread. */ const workerApi = { /** From 0d4092307c67f06e73e63bda3397c605fb90ac1a Mon Sep 17 00:00:00 2001 From: abhigyanpatwari Date: Mon, 5 Jan 2026 23:03:41 +0530 Subject: [PATCH 4/6] ficed signin popup from proxy server when pvt repo was cloned withot PAT --- api/proxy.ts | 11 +- src/components/DropZone.tsx | 10 +- src/components/EmbeddingStatus.tsx | 124 +++++++++++++------- src/components/WebGPUFallbackDialog.tsx | 149 ++++++++++++++++++++++++ src/core/embeddings/embedder.ts | 122 +++++++++++++++---- src/core/embeddings/types.ts | 8 +- src/hooks/useAppState.tsx | 16 ++- src/workers/ingestion.worker.ts | 11 +- 8 files changed, 370 insertions(+), 81 deletions(-) create mode 100644 src/components/WebGPUFallbackDialog.tsx diff --git a/api/proxy.ts b/api/proxy.ts index f59a73e33e..76098144c0 100644 --- a/api/proxy.ts +++ b/api/proxy.ts @@ -78,9 +78,16 @@ export default async function handler(req: VercelRequest, res: VercelResponse) { res.setHeader('Access-Control-Allow-Origin', '*'); res.setHeader('Access-Control-Expose-Headers', '*'); - // Forward response headers + // Forward response headers (except ones that cause issues) + const skipHeaders = [ + 'content-encoding', + 'transfer-encoding', + 'connection', + 'www-authenticate', // IMPORTANT: Strip this to prevent browser's native auth popup! + ]; + response.headers.forEach((value, key) => { - if (!['content-encoding', 'transfer-encoding', 'connection'].includes(key.toLowerCase())) { + if (!skipHeaders.includes(key.toLowerCase())) { res.setHeader(key, value); } }); diff --git a/src/components/DropZone.tsx b/src/components/DropZone.tsx index ad286adfc0..b89dbdfbe8 100644 --- a/src/components/DropZone.tsx +++ b/src/components/DropZone.tsx @@ -91,8 +91,14 @@ export const DropZone = ({ onFileSelect, onGitClone }: DropZoneProps) => { console.error('Clone failed:', err); const message = err instanceof Error ? err.message : 'Failed to clone repository'; // Provide helpful error for auth failures - if (message.includes('401') || message.includes('403')) { - setError('Authentication failed. Check your token or ensure the repo is accessible.'); + if (message.includes('401') || message.includes('403') || message.includes('Authentication')) { + if (!githubToken) { + setError('๐Ÿ”’ This looks like a private repo. Add a GitHub PAT (Personal Access Token) to access it.'); + } else { + setError('๐Ÿ”‘ Authentication failed. Check your token permissions (needs repo access).'); + } + } else if (message.includes('404') || message.includes('not found')) { + setError('Repository not found. Check the URL or it might be private (needs PAT).'); } else { setError(message); } diff --git a/src/components/EmbeddingStatus.tsx b/src/components/EmbeddingStatus.tsx index f7e2bed2a8..2af94c2841 100644 --- a/src/components/EmbeddingStatus.tsx +++ b/src/components/EmbeddingStatus.tsx @@ -1,6 +1,7 @@ import { Brain, Loader2, Check, AlertCircle, Zap, FlaskConical } from 'lucide-react'; import { useAppState } from '../hooks/useAppState'; import { useState } from 'react'; +import { WebGPUFallbackDialog } from './WebGPUFallbackDialog'; /** * Embedding status indicator and trigger button @@ -17,17 +18,36 @@ export const EmbeddingStatus = () => { } = useAppState(); const [testResult, setTestResult] = useState(null); + const [showFallbackDialog, setShowFallbackDialog] = useState(false); // Only show when exploring a loaded graph if (viewMode !== 'exploring' || !graph) return null; - const handleStartEmbeddings = async () => { + const nodeCount = graph.nodes.length; + + const handleStartEmbeddings = async (forceDevice?: 'webgpu' | 'wasm') => { try { - await startEmbeddings(); - } catch (error) { - console.error('Embedding failed:', error); + await startEmbeddings(forceDevice); + } catch (error: any) { + // Check if it's a WebGPU not available error + if (error?.name === 'WebGPUNotAvailableError' || + error?.message?.includes('WebGPU not available')) { + setShowFallbackDialog(true); + } else { + console.error('Embedding failed:', error); + } } }; + + const handleUseCPU = () => { + setShowFallbackDialog(false); + handleStartEmbeddings('wasm'); + }; + + const handleSkipEmbeddings = () => { + setShowFallbackDialog(false); + // Just close - user can try again later if they want + }; const handleTestArrayParams = async () => { setTestResult('Testing...'); @@ -41,32 +61,46 @@ export const EmbeddingStatus = () => { } }; + // WebGPU fallback dialog - rendered independently of state + const fallbackDialog = ( + setShowFallbackDialog(false)} + onUseCPU={handleUseCPU} + onSkip={handleSkipEmbeddings} + nodeCount={nodeCount} + /> + ); + // Idle state - show button to start if (embeddingStatus === 'idle') { return ( -
- {/* Test button (dev only) */} - {import.meta.env.DEV && ( + <> +
+ {/* Test button (dev only) */} + {import.meta.env.DEV && ( + + )} + - )} - - -
+
+ {fallbackDialog} + ); } @@ -74,18 +108,21 @@ export const EmbeddingStatus = () => { if (embeddingStatus === 'loading') { const downloadPercent = embeddingProgress?.modelDownloadPercent ?? 0; return ( -
- -
- Loading AI model... -
-
+ <> +
+ +
+ Loading AI model... +
+
+
-
+ {fallbackDialog} + ); } @@ -139,14 +176,17 @@ export const EmbeddingStatus = () => { // Error if (embeddingStatus === 'error') { return ( - + <> + + {fallbackDialog} + ); } diff --git a/src/components/WebGPUFallbackDialog.tsx b/src/components/WebGPUFallbackDialog.tsx new file mode 100644 index 0000000000..8bb28d9c45 --- /dev/null +++ b/src/components/WebGPUFallbackDialog.tsx @@ -0,0 +1,149 @@ +import { useState, useEffect } from 'react'; +import { X, Snail, Rocket, SkipForward } from 'lucide-react'; + +interface WebGPUFallbackDialogProps { + isOpen: boolean; + onClose: () => void; + onUseCPU: () => void; + onSkip: () => void; + nodeCount: number; +} + +/** + * Fun dialog shown when WebGPU isn't available + * Lets user choose: CPU fallback (slow) or skip embeddings + */ +export const WebGPUFallbackDialog = ({ + isOpen, + onClose, + onUseCPU, + onSkip, + nodeCount, +}: WebGPUFallbackDialogProps) => { + const [isAnimating, setIsAnimating] = useState(true); + const [isVisible, setIsVisible] = useState(false); + + useEffect(() => { + if (isOpen) { + // Trigger animation after mount + requestAnimationFrame(() => setIsVisible(true)); + } else { + setIsVisible(false); + } + }, [isOpen]); + + if (!isOpen) return null; + + // Estimate time based on node count (rough: ~50ms per node on CPU) + const estimatedMinutes = Math.ceil((nodeCount * 50) / 60000); + const isSmallCodebase = nodeCount < 200; + + return ( +
+ {/* Backdrop */} +
+ + {/* Dialog */} +
+ {/* Header with scratching emoji */} +
+ + +
+ {/* Animated emoji */} +
setIsAnimating(false)} + onClick={() => setIsAnimating(true)} + > + ๐Ÿค” +
+
+

+ WebGPU said "nope" +

+

+ Your browser doesn't support GPU acceleration +

+
+
+
+ + {/* Content */} +
+

+ Couldn't create embeddings with WebGPU, so semantic search (Graph RAG) + won't be as smart. The graph still works fine though! +

+ +
+

+ Your options: +

+
    +
  • + + + Use CPU โ€” Works but {isSmallCodebase ? 'a bit' : 'way'} slower + {nodeCount > 0 && ( + (~{estimatedMinutes} min for {nodeCount} nodes) + )} + +
  • +
  • + + + Skip it โ€” Graph works, just no AI semantic search + +
  • +
+
+ + {isSmallCodebase && ( +

+ + Small codebase detected! CPU should be fine. +

+ )} + +

+ ๐Ÿ’ก Tip: Try Chrome or Edge for WebGPU support +

+
+ + {/* Actions */} +
+ + +
+
+
+ ); +}; + diff --git a/src/core/embeddings/embedder.ts b/src/core/embeddings/embedder.ts index 3744d0c6ce..81a09feddb 100644 --- a/src/core/embeddings/embedder.ts +++ b/src/core/embeddings/embedder.ts @@ -14,23 +14,66 @@ import { DEFAULT_EMBEDDING_CONFIG, type EmbeddingConfig, type ModelProgress } fr let embedderInstance: FeatureExtractionPipeline | null = null; let isInitializing = false; let initPromise: Promise | null = null; +let currentDevice: 'webgpu' | 'wasm' | null = null; /** * Progress callback type for model loading */ export type ModelProgressCallback = (progress: ModelProgress) => void; +/** + * Custom error thrown when WebGPU is not available + * Allows UI to prompt user for fallback choice + */ +export class WebGPUNotAvailableError extends Error { + constructor(originalError?: Error) { + super('WebGPU not available in this browser'); + this.name = 'WebGPUNotAvailableError'; + this.cause = originalError; + } +} + +/** + * Check if WebGPU is available in this browser + * Quick check without loading the model + */ +export const checkWebGPUAvailability = async (): Promise => { + try { + if (!navigator.gpu) { + return false; + } + const adapter = await navigator.gpu.requestAdapter(); + if (!adapter) { + return false; + } + // Try to get a device - this is where it usually fails + const device = await adapter.requestDevice(); + device.destroy(); // Clean up + return true; + } catch { + return false; + } +}; + +/** + * Get the current device being used for inference + */ +export const getCurrentDevice = (): 'webgpu' | 'wasm' | null => currentDevice; + /** * Initialize the embedding model * Uses singleton pattern - only loads once, subsequent calls return cached instance * * @param onProgress - Optional callback for model download progress * @param config - Optional configuration override + * @param forceDevice - Force a specific device (bypasses WebGPU check) * @returns Promise resolving to the embedder pipeline + * @throws WebGPUNotAvailableError if WebGPU is requested but unavailable */ export const initEmbedder = async ( onProgress?: ModelProgressCallback, - config: Partial = {} + config: Partial = {}, + forceDevice?: 'webgpu' | 'wasm' ): Promise => { // Return existing instance if available if (embedderInstance) { @@ -45,6 +88,7 @@ export const initEmbedder = async ( isInitializing = true; const finalConfig = { ...DEFAULT_EMBEDDING_CONFIG, ...config }; + const requestedDevice = forceDevice || finalConfig.device; initPromise = (async () => { try { @@ -55,8 +99,6 @@ export const initEmbedder = async ( console.log(`๐Ÿง  Loading embedding model: ${finalConfig.modelId}`); } - // Create the feature extraction pipeline - // Try WebGPU first for speed, fallback to WASM for compatibility const progressCallback = onProgress ? (data: any) => { const progress: ModelProgress = { status: data.status || 'progress', @@ -68,52 +110,84 @@ export const initEmbedder = async ( onProgress(progress); } : undefined; - // Try preferred device first, fallback to wasm if it fails - const devicesToTry: Array<'webgpu' | 'wasm'> = - finalConfig.device === 'webgpu' ? ['webgpu', 'wasm'] : ['wasm']; - - let lastError: Error | null = null; - - for (const device of devicesToTry) { + // If WebGPU is requested (default), check availability first + if (requestedDevice === 'webgpu') { + if (import.meta.env.DEV) { + console.log('๐Ÿ”ง Checking WebGPU availability...'); + } + + const webgpuAvailable = await checkWebGPUAvailability(); + + if (!webgpuAvailable) { + if (import.meta.env.DEV) { + console.warn('โš ๏ธ WebGPU not available'); + } + isInitializing = false; + initPromise = null; + throw new WebGPUNotAvailableError(); + } + + // Try WebGPU try { - if (import.meta.env.DEV && devicesToTry.length > 1) { - console.log(`๐Ÿ”ง Trying ${device} backend...`); + if (import.meta.env.DEV) { + console.log('๐Ÿ”ง Initializing WebGPU backend...'); } embedderInstance = await pipeline( 'feature-extraction', finalConfig.modelId, { - device, + device: 'webgpu', dtype: 'fp32', progress_callback: progressCallback, } ); + currentDevice = 'webgpu'; if (import.meta.env.DEV) { - console.log(`โœ… Using ${device} backend`); + console.log('โœ… Using WebGPU backend'); } - break; // Success! - } catch (err) { - lastError = err as Error; if (import.meta.env.DEV) { - console.warn(`โš ๏ธ ${device} backend failed:`, err); + console.warn('โš ๏ธ WebGPU initialization failed:', err); } - // Continue to next device + isInitializing = false; + initPromise = null; + embedderInstance = null; + throw new WebGPUNotAvailableError(err as Error); + } + } else { + // WASM mode requested (user chose fallback) + if (import.meta.env.DEV) { + console.log('๐Ÿ”ง Initializing WASM backend (this will be slower)...'); + } + + embedderInstance = await pipeline( + 'feature-extraction', + finalConfig.modelId, + { + device: 'wasm', // WASM-based CPU execution + dtype: 'fp32', + progress_callback: progressCallback, + } + ); + currentDevice = 'wasm'; + + if (import.meta.env.DEV) { + console.log('โœ… Using WASM backend'); } - } - - if (!embedderInstance) { - throw lastError || new Error('No backend available'); } if (import.meta.env.DEV) { console.log('โœ… Embedding model loaded successfully'); } - return embedderInstance; + return embedderInstance!; } catch (error) { + // Re-throw WebGPUNotAvailableError as-is + if (error instanceof WebGPUNotAvailableError) { + throw error; + } isInitializing = false; initPromise = null; embedderInstance = null; diff --git a/src/core/embeddings/types.ts b/src/core/embeddings/types.ts index 314996660b..e4a04222b2 100644 --- a/src/core/embeddings/types.ts +++ b/src/core/embeddings/types.ts @@ -59,8 +59,8 @@ export interface EmbeddingConfig { batchSize: number; /** Embedding vector dimensions */ dimensions: number; - /** Device to use for inference */ - device: 'webgpu' | 'wasm' | 'cpu'; + /** Device to use for inference: 'webgpu' for GPU acceleration, 'wasm' for WASM-based CPU */ + device: 'webgpu' | 'wasm'; /** Maximum characters of code snippet to include */ maxSnippetLength: number; } @@ -68,13 +68,13 @@ export interface EmbeddingConfig { /** * Default embedding configuration * Uses snowflake-arctic-embed-xs for browser efficiency - * Tries WebGPU first (fast), auto-fallback to WASM if unavailable + * Tries WebGPU first (fast), user can choose WASM fallback if unavailable */ export const DEFAULT_EMBEDDING_CONFIG: EmbeddingConfig = { modelId: 'Snowflake/snowflake-arctic-embed-xs', batchSize: 16, dimensions: 384, - device: 'webgpu', // Try WebGPU first, auto-fallback to WASM + device: 'webgpu', // WebGPU preferred, WASM fallback available if user chooses maxSnippetLength: 500, }; diff --git a/src/hooks/useAppState.tsx b/src/hooks/useAppState.tsx index 59570932ad..6336e7a47c 100644 --- a/src/hooks/useAppState.tsx +++ b/src/hooks/useAppState.tsx @@ -75,7 +75,7 @@ interface AppState { embeddingProgress: EmbeddingProgress | null; // Embedding methods - startEmbeddings: () => Promise; + startEmbeddings: (forceDevice?: 'webgpu' | 'wasm') => Promise; semanticSearch: (query: string, k?: number) => Promise; semanticSearchWithContext: (query: string, k?: number, hops?: number) => Promise; isEmbeddingReady: boolean; @@ -197,7 +197,7 @@ export const AppStateProvider = ({ children }: { children: ReactNode }) => { }, []); // Embedding methods - const startEmbeddings = useCallback(async (): Promise => { + const startEmbeddings = useCallback(async (forceDevice?: 'webgpu' | 'wasm'): Promise => { const api = apiRef.current; if (!api) throw new Error('Worker not initialized'); @@ -228,9 +228,15 @@ export const AppStateProvider = ({ children }: { children: ReactNode }) => { } }); - await api.startEmbeddingPipeline(proxiedOnProgress); - } catch (error) { - setEmbeddingStatus('error'); + await api.startEmbeddingPipeline(proxiedOnProgress, forceDevice); + } catch (error: any) { + // Check if it's WebGPU not available - let caller handle the dialog + if (error?.name === 'WebGPUNotAvailableError' || + error?.message?.includes('WebGPU not available')) { + setEmbeddingStatus('idle'); // Reset to idle so user can try again + } else { + setEmbeddingStatus('error'); + } throw error; } }, []); diff --git a/src/workers/ingestion.worker.ts b/src/workers/ingestion.worker.ts index d56643acc4..1c6ccb86c6 100644 --- a/src/workers/ingestion.worker.ts +++ b/src/workers/ingestion.worker.ts @@ -166,9 +166,11 @@ const workerApi = { * Start the embedding pipeline in the background * Generates embeddings for all embeddable nodes and creates vector index * @param onProgress - Proxied callback for embedding progress updates + * @param forceDevice - Force a specific device ('webgpu' or 'wasm') */ async startEmbeddingPipeline( - onProgress: (progress: EmbeddingProgress) => void + onProgress: (progress: EmbeddingProgress) => void, + forceDevice?: 'webgpu' | 'wasm' ): Promise { const kuzu = await getKuzuAdapter(); if (!kuzu.isKuzuReady()) { @@ -187,7 +189,12 @@ const workerApi = { onProgress(progress); }; - await runEmbeddingPipeline(kuzu.executeQuery, kuzu.executeWithReusedStatement, progressCallback); + await runEmbeddingPipeline( + kuzu.executeQuery, + kuzu.executeWithReusedStatement, + progressCallback, + forceDevice ? { device: forceDevice } : {} + ); }, /** From 5e48f0a5664c4af39660839d05b7834cb863f1e7 Mon Sep 17 00:00:00 2001 From: abhigyanpatwari Date: Mon, 5 Jan 2026 23:20:24 +0530 Subject: [PATCH 5/6] skipLibCheck (in tsconfig.app.json) set to true to fix build fails --- tsconfig.app.json | 1 + 1 file changed, 1 insertion(+) diff --git a/tsconfig.app.json b/tsconfig.app.json index ad93a88e21..f429ad3ea2 100644 --- a/tsconfig.app.json +++ b/tsconfig.app.json @@ -13,6 +13,7 @@ "esModuleInterop": true, "allowSyntheticDefaultImports": true, "forceConsistentCasingInFileNames": true, + "skipLibCheck": true, "baseUrl": "./", "paths": { "@/*": ["./src/*"] From 06fe177f590a99b82a5775e1bbfec4a01c66dbd5 Mon Sep 17 00:00:00 2001 From: abhigyanpatwari Date: Mon, 5 Jan 2026 23:28:28 +0530 Subject: [PATCH 6/6] set navigator and pipeline to any to bypass build issue. Its issue is originating from node modules, not my code so its fine :-), I hope --- src/core/embeddings/embedder.ts | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/core/embeddings/embedder.ts b/src/core/embeddings/embedder.ts index 81a09feddb..1188945835 100644 --- a/src/core/embeddings/embedder.ts +++ b/src/core/embeddings/embedder.ts @@ -39,10 +39,12 @@ export class WebGPUNotAvailableError extends Error { */ export const checkWebGPUAvailability = async (): Promise => { try { - if (!navigator.gpu) { + // Cast to any to avoid WebGPU types not being available in all TS configs + const nav = navigator as any; + if (!nav.gpu) { return false; } - const adapter = await navigator.gpu.requestAdapter(); + const adapter = await nav.gpu.requestAdapter(); if (!adapter) { return false; } @@ -133,7 +135,8 @@ export const initEmbedder = async ( console.log('๐Ÿ”ง Initializing WebGPU backend...'); } - embedderInstance = await pipeline( + // Type assertion needed due to complex union types in transformers.js + embedderInstance = await (pipeline as any)( 'feature-extraction', finalConfig.modelId, { @@ -162,7 +165,8 @@ export const initEmbedder = async ( console.log('๐Ÿ”ง Initializing WASM backend (this will be slower)...'); } - embedderInstance = await pipeline( + // Type assertion needed due to complex union types in transformers.js + embedderInstance = await (pipeline as any)( 'feature-extraction', finalConfig.modelId, {