From cbc2110684db8860e21d0afa8efd7c41d18b2d79 Mon Sep 17 00:00:00 2001
From: Simon Iribarren <simon.ig13@gmail.com>
Date: Thu, 5 Feb 2026 21:45:49 +0100
Subject: [PATCH] feat(qvac-registry-schema): add findBy() method to
 RegistryDatabase

Add a unified findBy(params) method that:
- Selects the most efficient HyperDB index based on provided params
- Supports filtering by name, engine, and quantization
- Applies additional filters in memory for multi-field queries
- Optionally includes deprecated models

This enables clients to use a single method for model queries instead of
manually choosing between findModelsByEngine/Name/Quantization.
---
 .../scripts/build-db-spec.js                  |   7 +
 .../qvac-lib-registry-server/shared/db.js     |  41 ++++
 .../shared/spec/hyperdb/db.json               |  16 +-
 .../shared/spec/hyperdb/index.js              |  59 ++++-
 .../unit/registry-database.findby.test.js     | 225 ++++++++++++++++++
 5 files changed, 346 insertions(+), 2 deletions(-)
 create mode 100644 packages/qvac-lib-registry-server/tests/unit/registry-database.findby.test.js

diff --git a/packages/qvac-lib-registry-server/scripts/build-db-spec.js b/packages/qvac-lib-registry-server/scripts/build-db-spec.js
index 77b3960f88..34a29a09ff 100644
--- a/packages/qvac-lib-registry-server/scripts/build-db-spec.js
+++ b/packages/qvac-lib-registry-server/scripts/build-db-spec.js
@@ -64,6 +64,13 @@ registryDB.indexes.register({
   key: ['quantization']
 })
 
+registryDB.indexes.register({
+  name: 'models-by-engine-quantization',
+  collection: `@${QVAC_MAIN_REGISTRY}/model`,
+  unique: false,
+  key: ['engine', 'quantization']
+})
+
 HyperDBBuilder.toDisk(db)
 
 const dispatch = Hyperdispatch.from(SCHEMA_DIR, DISPATCH_DIR)
diff --git a/packages/qvac-lib-registry-server/shared/db.js b/packages/qvac-lib-registry-server/shared/db.js
index 72cd4e9a08..61dfe2c609 100644
--- a/packages/qvac-lib-registry-server/shared/db.js
+++ b/packages/qvac-lib-registry-server/shared/db.js
@@ -75,6 +75,47 @@ class RegistryDatabase extends ReadyResource {
     return this.db.find(`@${QVAC_MAIN_REGISTRY}/models-by-quantization`, query)
   }
 
+  findModelsByEngineQuantization (query = {}) {
+    return this.db.find(`@${QVAC_MAIN_REGISTRY}/models-by-engine-quantization`, query)
+  }
+
+  async findBy (params = {}) {
+    if (!this.opened) await this.ready()
+
+    const { name, engine, quantization, includeDeprecated = false } = params
+
+    let models
+
+    if (engine) {
+      const query = { gte: { engine }, lte: { engine } }
+      if (quantization) {
+        query.gte.quantization = quantization
+        query.lte.quantization = quantization + '\uffff'
+      }
+      models = await this.db.find(`@${QVAC_MAIN_REGISTRY}/models-by-engine-quantization`, query).toArray()
+    } else if (quantization) {
+      models = await this.db.find(`@${QVAC_MAIN_REGISTRY}/models-by-quantization`, {
+        gte: { quantization },
+        lte: { quantization: quantization + '\uffff' }
+      }).toArray()
+    } else if (name) {
+      models = await this.db.find(`@${QVAC_MAIN_REGISTRY}/models-by-name`, {
+        gte: name,
+        lte: name + '\uffff'
+      }).toArray()
+    } else {
+      models = await this.db.find(`@${QVAC_MAIN_REGISTRY}/model`, {}).toArray()
+    }
+
+    if (name && engine) {
+      models = models.filter(m => m.path?.split('/').pop()?.toLowerCase().includes(name.toLowerCase()))
+    }
+
+    if (!includeDeprecated) models = models.filter(m => !m.deprecated)
+
+    return models
+  }
+
   async putLicense (record) {
     if (!this.opened) await this.ready()
     const tx = this.db.transaction()
diff --git a/packages/qvac-lib-registry-server/shared/spec/hyperdb/db.json b/packages/qvac-lib-registry-server/shared/spec/hyperdb/db.json
index 1845ee7e7a..63438506af 100644
--- a/packages/qvac-lib-registry-server/shared/spec/hyperdb/db.json
+++ b/packages/qvac-lib-registry-server/shared/spec/hyperdb/db.json
@@ -23,7 +23,8 @@
       "indexes": [
         "@qvac-main-registry/models-by-engine",
         "@qvac-main-registry/models-by-name",
-        "@qvac-main-registry/models-by-quantization"
+        "@qvac-main-registry/models-by-quantization",
+        "@qvac-main-registry/models-by-engine-quantization"
       ],
       "schema": "@qvac-main-registry/model",
       "derived": false,
@@ -69,6 +70,19 @@
       "key": [
         "quantization"
       ]
+    },
+    {
+      "name": "models-by-engine-quantization",
+      "namespace": "qvac-main-registry",
+      "id": 5,
+      "type": 2,
+      "collection": "@qvac-main-registry/model",
+      "unique": false,
+      "deprecated": false,
+      "key": [
+        "engine",
+        "quantization"
+      ]
     }
   ]
 }
\ No newline at end of file
diff --git a/packages/qvac-lib-registry-server/shared/spec/hyperdb/index.js b/packages/qvac-lib-registry-server/shared/spec/hyperdb/index.js
index 99ee875d68..d6ce88a612 100644
--- a/packages/qvac-lib-registry-server/shared/spec/hyperdb/index.js
+++ b/packages/qvac-lib-registry-server/shared/spec/hyperdb/index.js
@@ -274,6 +274,61 @@ const index4 = {
 }
 collection1.indexes.push(index4)
 
+// '@qvac-main-registry/models-by-engine-quantization' collection key
+const index5_key = new IndexEncoder([
+  IndexEncoder.STRING,
+  IndexEncoder.STRING,
+  IndexEncoder.STRING,
+  IndexEncoder.STRING
+], { prefix: 5 })
+
+function index5_indexify (record) {
+  const arr = []
+
+  const a0 = record.engine
+  if (a0 === undefined) return arr
+  arr.push(a0)
+
+  const a1 = record.quantization
+  if (a1 === undefined) return arr
+  arr.push(a1)
+
+  const a2 = record.path
+  if (a2 === undefined) return arr
+  arr.push(a2)
+
+  const a3 = record.source
+  if (a3 === undefined) return arr
+  arr.push(a3)
+
+  return arr
+}
+
+// '@qvac-main-registry/models-by-engine-quantization'
+const index5 = {
+  name: '@qvac-main-registry/models-by-engine-quantization',
+  id: 5,
+  encodeKey (record) {
+    return index5_key.encode(index5_indexify(record))
+  },
+  encodeKeyRange ({ gt, lt, gte, lte } = {}) {
+    return index5_key.encodeRange({
+      gt: gt ? index5_indexify(gt) : null,
+      lt: lt ? index5_indexify(lt) : null,
+      gte: gte ? index5_indexify(gte) : null,
+      lte: lte ? index5_indexify(lte) : null
+    })
+  },
+  encodeValue: (doc) => index5.collection.encodeKey(doc),
+  encodeIndexKeys (record, context) {
+    return [index5_key.encode([record.engine, record.quantization, record.path, record.source])]
+  },
+  reconstruct: (keyBuf, valueBuf) => valueBuf,
+  offset: collection1.indexes.length,
+  collection: collection1
+}
+collection1.indexes.push(index5)
+
 const collections = [
   collection0,
   collection1
@@ -282,7 +337,8 @@ const collections = [
 const indexes = [
   index2,
   index3,
-  index4
+  index4,
+  index5
 ]
 
 module.exports = { version, collections, indexes, resolveCollection, resolveIndex }
@@ -300,6 +356,7 @@ function resolveIndex (name) {
     case '@qvac-main-registry/models-by-engine': return index2
     case '@qvac-main-registry/models-by-name': return index3
     case '@qvac-main-registry/models-by-quantization': return index4
+    case '@qvac-main-registry/models-by-engine-quantization': return index5
     default: return null
   }
 }
diff --git a/packages/qvac-lib-registry-server/tests/unit/registry-database.findby.test.js b/packages/qvac-lib-registry-server/tests/unit/registry-database.findby.test.js
new file mode 100644
index 0000000000..8570d2607f
--- /dev/null
+++ b/packages/qvac-lib-registry-server/tests/unit/registry-database.findby.test.js
@@ -0,0 +1,225 @@
+'use strict'
+
+const test = require('brittle')
+const Corestore = require('corestore')
+const tmp = require('test-tmp')
+const RegistryDatabase = require('../../shared/db')
+
+const TEST_MODELS = [
+  {
+    path: 'models/llama-3.2-1b-instruct-q4_k_m.gguf',
+    source: 'https://huggingface.co/example/llama-3.2',
+    engine: '@qvac/llm-llamacpp',
+    licenseId: 'Llama-3.2',
+    quantization: 'q4_k_m',
+    blobBinding: { coreKey: Buffer.alloc(32), blockOffset: 0, blockLength: 1, byteOffset: 0, byteLength: 100, sha256: 'aaa' }
+  },
+  {
+    path: 'models/llama-3.2-1b-instruct-q8_0.gguf',
+    source: 'https://huggingface.co/example/llama-3.2',
+    engine: '@qvac/llm-llamacpp',
+    licenseId: 'Llama-3.2',
+    quantization: 'q8_0',
+    blobBinding: { coreKey: Buffer.alloc(32), blockOffset: 0, blockLength: 1, byteOffset: 0, byteLength: 200, sha256: 'bbb' }
+  },
+  {
+    path: 'models/whisper-tiny-q5_1.bin',
+    source: 'https://huggingface.co/example/whisper-tiny',
+    engine: '@qvac/transcription-whispercpp',
+    licenseId: 'MIT',
+    quantization: 'q5_1',
+    blobBinding: { coreKey: Buffer.alloc(32), blockOffset: 0, blockLength: 1, byteOffset: 0, byteLength: 300, sha256: 'ccc' }
+  },
+  {
+    path: 'models/salamandrata-2b-q4_k_m.gguf',
+    source: 'https://huggingface.co/example/salamandrata',
+    engine: '@qvac/translation-llamacpp',
+    licenseId: 'Apache-2.0',
+    quantization: 'q4_k_m',
+    blobBinding: { coreKey: Buffer.alloc(32), blockOffset: 0, blockLength: 1, byteOffset: 0, byteLength: 400, sha256: 'ddd' }
+  },
+  {
+    path: 'models/deprecated-model.gguf',
+    source: 'https://huggingface.co/example/old-model',
+    engine: '@qvac/llm-llamacpp',
+    licenseId: 'MIT',
+    quantization: 'q4_k_m',
+    deprecated: true,
+    deprecatedAt: '2025-01-01T00:00:00.000Z',
+    deprecationReason: 'Superseded',
+    blobBinding: { coreKey: Buffer.alloc(32), blockOffset: 0, blockLength: 1, byteOffset: 0, byteLength: 50, sha256: 'eee' }
+  }
+]
+
+async function createDB (t) {
+  const storage = await tmp(t)
+  const store = new Corestore(storage)
+  await store.ready()
+
+  const core = store.get({ name: 'test-registry' })
+  await core.ready()
+
+  const db = new RegistryDatabase(core)
+  await db.ready()
+
+  for (const model of TEST_MODELS) {
+    await db.putModel(model)
+  }
+
+  return { db, store }
+}
+
+async function cleanup ({ db, store }) {
+  await db.close()
+  await store.close()
+}
+
+test('findBy() - no filters returns all non-deprecated models', async t => {
+  const ctx = await createDB(t)
+
+  try {
+    const models = await ctx.db.findBy()
+    t.is(models.length, 4, 'returns 4 non-deprecated models')
+    t.absent(models.find(m => m.deprecated), 'no deprecated models')
+  } finally {
+    await cleanup(ctx)
+  }
+})
+
+test('findBy() - includeDeprecated returns all models', async t => {
+  const ctx = await createDB(t)
+
+  try {
+    const models = await ctx.db.findBy({ includeDeprecated: true })
+    t.is(models.length, 5, 'returns all 5 models including deprecated')
+    t.ok(models.find(m => m.deprecated), 'includes deprecated model')
+  } finally {
+    await cleanup(ctx)
+  }
+})
+
+test('findBy({ engine }) - filters by engine', async t => {
+  const ctx = await createDB(t)
+
+  try {
+    const llmModels = await ctx.db.findBy({ engine: '@qvac/llm-llamacpp' })
+    t.is(llmModels.length, 2, 'returns 2 non-deprecated llm models')
+    t.ok(llmModels.every(m => m.engine === '@qvac/llm-llamacpp'), 'all have correct engine')
+
+    const whisperModels = await ctx.db.findBy({ engine: '@qvac/transcription-whispercpp' })
+    t.is(whisperModels.length, 1, 'returns 1 whisper model')
+    t.is(whisperModels[0].engine, '@qvac/transcription-whispercpp', 'correct engine')
+
+    const noModels = await ctx.db.findBy({ engine: '@qvac/nonexistent' })
+    t.is(noModels.length, 0, 'returns empty for unknown engine')
+  } finally {
+    await cleanup(ctx)
+  }
+})
+
+test('findBy({ engine }) - with includeDeprecated', async t => {
+  const ctx = await createDB(t)
+
+  try {
+    const models = await ctx.db.findBy({ engine: '@qvac/llm-llamacpp', includeDeprecated: true })
+    t.is(models.length, 3, 'returns 3 llm models including deprecated')
+  } finally {
+    await cleanup(ctx)
+  }
+})
+
+test('findBy({ quantization }) - filters by quantization', async t => {
+  const ctx = await createDB(t)
+
+  try {
+    const q4Models = await ctx.db.findBy({ quantization: 'q4_k_m' })
+    t.is(q4Models.length, 2, 'returns 2 non-deprecated q4_k_m models')
+    t.ok(q4Models.every(m => m.quantization?.toLowerCase().includes('q4_k_m')), 'all match quantization')
+
+    const q8Models = await ctx.db.findBy({ quantization: 'q8_0' })
+    t.is(q8Models.length, 1, 'returns 1 q8_0 model')
+  } finally {
+    await cleanup(ctx)
+  }
+})
+
+test('findBy({ engine, quantization }) - compound filter uses index', async t => {
+  const ctx = await createDB(t)
+
+  try {
+    const models = await ctx.db.findBy({ engine: '@qvac/llm-llamacpp', quantization: 'q4_k_m' })
+    t.is(models.length, 1, 'returns 1 non-deprecated llm + q4_k_m model')
+    t.is(models[0].engine, '@qvac/llm-llamacpp', 'correct engine')
+    t.is(models[0].quantization, 'q4_k_m', 'correct quantization')
+
+    const models2 = await ctx.db.findBy({ engine: '@qvac/llm-llamacpp', quantization: 'q8_0' })
+    t.is(models2.length, 1, 'returns 1 llm + q8_0 model')
+    t.is(models2[0].quantization, 'q8_0', 'correct quantization')
+
+    const noModels = await ctx.db.findBy({ engine: '@qvac/transcription-whispercpp', quantization: 'q4_k_m' })
+    t.is(noModels.length, 0, 'returns empty for non-matching combo')
+  } finally {
+    await cleanup(ctx)
+  }
+})
+
+test('findBy({ engine, quantization, includeDeprecated }) - compound with deprecated', async t => {
+  const ctx = await createDB(t)
+
+  try {
+    const models = await ctx.db.findBy({
+      engine: '@qvac/llm-llamacpp',
+      quantization: 'q4_k_m',
+      includeDeprecated: true
+    })
+    t.is(models.length, 2, 'returns 2 llm + q4_k_m models including deprecated')
+  } finally {
+    await cleanup(ctx)
+  }
+})
+
+test('findBy({ name }) - filters by name from path', async t => {
+  const ctx = await createDB(t)
+
+  try {
+    const models = await ctx.db.findBy({ name: 'llama' })
+    t.is(models.length, 2, 'returns 2 models matching llama')
+    t.ok(models.every(m => m.path.toLowerCase().includes('llama')), 'all paths contain llama')
+
+    const whisperModels = await ctx.db.findBy({ name: 'whisper' })
+    t.is(whisperModels.length, 1, 'returns 1 model matching whisper')
+
+    const noModels = await ctx.db.findBy({ name: 'nonexistent' })
+    t.is(noModels.length, 0, 'returns empty for no match')
+  } finally {
+    await cleanup(ctx)
+  }
+})
+
+test('findBy({ engine, name }) - engine index + name in-memory filter', async t => {
+  const ctx = await createDB(t)
+
+  try {
+    const models = await ctx.db.findBy({ engine: '@qvac/llm-llamacpp', name: 'q8' })
+    t.is(models.length, 1, 'returns 1 model matching llm engine + q8 name')
+    t.is(models[0].quantization, 'q8_0', 'correct model found')
+  } finally {
+    await cleanup(ctx)
+  }
+})
+
+test('findModelsByEngineQuantization() - direct compound index access', async t => {
+  const ctx = await createDB(t)
+
+  try {
+    const models = await ctx.db.findModelsByEngineQuantization({
+      gte: { engine: '@qvac/llm-llamacpp', quantization: 'q4_k_m' },
+      lte: { engine: '@qvac/llm-llamacpp', quantization: 'q4_k_m' }
+    }).toArray()
+
+    t.ok(models.length >= 1, 'returns models from compound index')
+    t.ok(models.every(m => m.engine === '@qvac/llm-llamacpp'), 'all have correct engine')
+  } finally {
+    await cleanup(ctx)
+  }
+})